HTMLCleaner－It's about life

因為HTML Tag的不對稱，在解譯的時候常常會吃苦頭，

後來找到htmlcleaner，可是網站上的Java Usage不能直接拿來用，

所以後來找了很久，終於在Source裡面找到Test範例。記起來，當備忘吧~~

import org.htmlcleaner.*;
import java.util.*;
import java.io.*;

/**
* Vladimir Nikic
* Date: Apr 13, 2007
* modified by Sam 2009
*/
public class Test {
public static void clean(String input)throws Exception{
HtmlCleaner cleaner = new HtmlCleaner();
String fileFull = "", fileName="";
File file = null;
try{
file = new File(input);
fileFull = file.getCanonicalPath().toString();
}catch(ArrayIndexOutOfBoundsException e){
System.out.println("using: java fileposition");
}
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true); //Optional
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
TagNode node = cleaner.clean(file); //Read the file from external file system
//System.out.println( new PrettyXmlSerializer(props).getXmlAsString(node) ); //Show the processed html content
new PrettyXmlSerializer(props).writeXmlToFile(node, fileFull); //Write back to original file
//new ConfigFileTagProvider(new File("//Applications//eclipse//workspace//Ungroup//src//geektu//default.xml")); //Can define the custom tag in configure file
fileName = file.getName();
System.out.println("\"" + fileName + "\" processed!");
}
}