try{ CleanerProperties props = new CleanerProperties(); props.setTranslateSpecialEntities(true); props.setTransResCharsToNCR(true); props.setOmitComments(true);
public CleanHtmlFunction() { this.cleaner = new HtmlCleaner(); CleanerProperties p = cleaner.getProperties(); p.setOmitComments(true); p.setTranslateSpecialEntities(true); p.setTransResCharsToNCR(true); // remove all tags that contain uninteresting content p.setPruneTags("style,script,form,object,audio,video"); }
/** * htmlcleaner로 html string을 xml string으로 바꿔주는 메소드. * @param source * @return */ private String toXML(String source){ try { CleanerProperties props = new CleanerProperties(); props.setTranslateSpecialEntities(true); props.setOmitComments(true); props.setPruneTags("script,style"); // namespace를 무시한다. props.setNamespacesAware(false); props.setAdvancedXmlEscape(true); props.setTranslateSpecialEntities(true); HtmlCleaner cl = new HtmlCleaner(props); TagNode tagNode = cl.clean(source); source = new PrettyXmlSerializer(props).getXmlAsString(tagNode); } catch (IOException e) { logger.error("",e); } return source; } //test용
props.setTranslateSpecialEntities( toBoolean(translateSpecialEntities) );
props.setTranslateSpecialEntities(toBoolean(translateSpecialEntities));
props.setTranslateSpecialEntities(this.specialentities); props.setRecognizeUnicodeChars(this.unicodechars); props.setOmitUnknownTags(this.omitunknowntags);
defaultProperties.setTranslateSpecialEntities(false);
/** * Cleans the relevant file and generates a valid XML file ready for processing to Sel 2 java File. * * @param absoluteFilename - name of the file to convert. * @return String - location of the converted file. */ public String convertToXML(String absoluteFilename) throws Exception { FileHandler fromSelIDE = new FileHandler(absoluteFilename); FileHandler toXML = new FileHandler(System.getProperty("java.io.tmpdir") + File.separator + fromSelIDE.getFileName() + ".xml", true); if (fromSelIDE.getFile().isDirectory()) { LOGGER.error("Cannot convert directory {} into a Selenium Test!", fromSelIDE.getFileName()); return null; } //Clean up html so that we can read it as XML properly HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties XMLPrefs = cleaner.getProperties(); XMLPrefs.setUseEmptyElementTags(true); XMLPrefs.setTranslateSpecialEntities(true); XMLPrefs.setTransResCharsToNCR(true); XMLPrefs.setOmitComments(true); XMLPrefs.setOmitComments(true); XMLPrefs.setOmitDoctypeDeclaration(true); XMLPrefs.setNamespacesAware(false); TagNode tagNode = new HtmlCleaner(XMLPrefs).clean(fromSelIDE.getFile()); new PrettyXmlSerializer(XMLPrefs).writeToStream(tagNode, toXML.getWritableFileOutputStream(), "utf-8"); toXML.close(); return toXML.getAbsoluteFile(); }
props.setUseEmptyElementTags(true); props.setAdvancedXmlEscape(true); props.setTranslateSpecialEntities(false); props.setBooleanAttributeValues("empty"); props.setAllowHtmlInsideAttributes(true);
props.setRecognizeUnicodeChars(false); props.setAdvancedXmlEscape(true); props.setTranslateSpecialEntities(false); props.setNamespacesAware(false); props.setAllowHtmlInsideAttributes(true);