因為HTML Tag的不對稱,在解譯的時候常常會吃苦頭,

後來找到htmlcleaner,可是網站上的Java Usage不能直接拿來用,

所以後來找了很久,終於在Source裡面找到Test範例。記起來,當備忘吧~~

import org.htmlcleaner.*;
import java.util.*;
import java.io.*;

/**
* Vladimir Nikic
* Date: Apr 13, 2007
* modified by Sam 2009
*/
public class Test {
public static void clean(String input)throws Exception{
HtmlCleaner cleaner = new HtmlCleaner();
String fileFull = "", fileName="";
File file = null;
try{
file = new File(input);
fileFull = file.getCanonicalPath().toString();
}catch(ArrayIndexOutOfBoundsException e){
System.out.println("using: java fileposition");
}
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true); //Optional
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
TagNode node = cleaner.clean(file); //Read the file from external file system
//System.out.println( new PrettyXmlSerializer(props).getXmlAsString(node) ); //Show the processed html content
new PrettyXmlSerializer(props).writeXmlToFile(node, fileFull); //Write back to original file
//new ConfigFileTagProvider(new File("//Applications//eclipse//workspace//Ungroup//src//geektu//default.xml")); //Can define the custom tag in configure file
fileName = file.getName();
System.out.println("\"" + fileName + "\" processed!");
}
}

arrow
arrow
    全站熱搜

    geektu 發表在 痞客邦 留言(0) 人氣()