因為HTML Tag的不對稱,在解譯的時候常常會吃苦頭, 後來找到htmlcleaner,可是網站上的Java Usage不能直接拿來用, 所以後來找了很久,終於在Source裡面找到Test範例。記起來,當備忘吧
import org.htmlcleaner.*; import java.util.*; import java.io.*; /** * Vladimir Nikic * Date: Apr 13, 2007 * modified by Sam 2009 */ public class Test { public static void clean(String input)throws Exception{ HtmlCleaner cleaner = new HtmlCleaner(); String fileFull = "", fileName=""; File file = null; try{ file = new File(input); fileFull = file.getCanonicalPath().toString(); }catch(ArrayIndexOutOfBoundsException e){ System.out.println("using: java fileposition"); } CleanerProperties props = cleaner.getProperties(); props.setUseCdataForScriptAndStyle(true);//Optional props.setRecognizeUnicodeChars(true); props.setUseEmptyElementTags(true); props.setAdvancedXmlEscape(true); props.setTranslateSpecialEntities(true); props.setBooleanAttributeValues("empty"); TagNode node = cleaner.clean(file);//Read the file from external file system //System.out.println( new PrettyXmlSerializer(props).getXmlAsString(node) );//Show the processed html content new PrettyXmlSerializer(props).writeXmlToFile(node, fileFull);//Write back to original file //new ConfigFileTagProvider(new File("//Applications//eclipse//workspace//Ungroup//src//geektu//default.xml")); //Can define the custom tag in configure file fileName = file.getName(); System.out.println("\"" + fileName + "\" processed!"); } }
文章標籤
全站熱搜
創作者介紹
創作者 geektu 的頭像
geektu

It's about life

geektu 發表在 痞客邦 留言(0) 人氣(0)