因為HTML Tag的不對稱,在解譯的時候常常會吃苦頭,
後來找到htmlcleaner,可是網站上的Java Usage不能直接拿來用,
所以後來找了很久,終於在Source裡面找到Test範例。記起來,當備忘吧
import org.htmlcleaner.*; import java.util.*; import java.io.*; /** * Vladimir Nikic * Date: Apr 13, 2007 * modified by Sam 2009 */ public class Test { public static void clean(String input)throws Exception{ HtmlCleaner cleaner = new HtmlCleaner(); String fileFull = "", fileName=""; File file = null; try{ file = new File(input); fileFull = file.getCanonicalPath().toString(); }catch(ArrayIndexOutOfBoundsException e){ System.out.println("using: java fileposition"); } CleanerProperties props = cleaner.getProperties(); props.setUseCdataForScriptAndStyle(true);//Optional props.setRecognizeUnicodeChars(true); props.setUseEmptyElementTags(true); props.setAdvancedXmlEscape(true); props.setTranslateSpecialEntities(true); props.setBooleanAttributeValues("empty"); TagNode node = cleaner.clean(file);//Read the file from external file system //System.out.println( new PrettyXmlSerializer(props).getXmlAsString(node) );//Show the processed html content new PrettyXmlSerializer(props).writeXmlToFile(node, fileFull);//Write back to original file //new ConfigFileTagProvider(new File("//Applications//eclipse//workspace//Ungroup//src//geektu//default.xml")); //Can define the custom tag in configure file fileName = file.getName(); System.out.println("\"" + fileName + "\" processed!"); } }
文章標籤
全站熱搜