org.htmlcleaner.CleanerProperties.setOmitComments java code examples

props.setTranslateSpecialEntities(true);
props.setTransResCharsToNCR(true);
props.setOmitComments(true);
TagNode tagNode = new HtmlCleaner(props).clean(new File("C:\\Users\\MyComputer\\Desktop\\aspose.html"));
String newString=new PrettyHtmlSerializer(props).getAsString(tagNode, "ISO-8859-1");

private static synchronized void initCleaner() {
  if (!htmlCleanerInitialized) {
    cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    props.setOmitComments(true);
    props.setOmitXmlDeclaration(true);
    htmlCleanerInitialized = true;
  }
}

 private Document clean(String content) throws ParserConfigurationException {
  HtmlCleaner cleaner = new HtmlCleaner();
  TagNode rootNode = cleaner.clean(content);

  // convert to DOM
  CleanerProperties properties = new CleanerProperties();
  properties.setOmitComments(true);
  DomSerializer domSerializer = new DomSerializer(properties);
  Document doc = domSerializer.createDOM(rootNode);
  return doc;
}

 public static Document getWebpageDocument_fromSource(String source) throws InterruptedException, IOException {
  try {
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    props.setAllowHtmlInsideAttributes(true);
    props.setAllowMultiWordAttributes(true);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);

    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = null;
    try {
      builder = builderFactory.newDocumentBuilder();
    } catch (ParserConfigurationException e) {
      e.printStackTrace();
    }

    TagNode tagNode = new HtmlCleaner().clean(source);

    Document doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

    return doc;
  } catch (ParserConfigurationException ex) {
    ex.printStackTrace();
    return null;
  }
}

cp.setAllowMultiWordAttributes(true);
cp.setRecognizeUnicodeChars(true);
cp.setOmitComments(true);

public static String snapFromHtmlWithCookies(Context context, String xPath, String attrToSnap, String urlString,
         String cookies) throws IOException, XPatherException {
     String snap = "";
     // create an instance of HtmlCleaner
     HtmlCleaner cleaner = new HtmlCleaner();
     // take default cleaner properties
     CleanerProperties props = cleaner.getProperties();
     props.setAllowHtmlInsideAttributes(true);
     props.setAllowMultiWordAttributes(true);
     props.setRecognizeUnicodeChars(true);
     props.setOmitComments(true);
     URL url = new URL(urlString);
     HttpURLConnection connection = (HttpURLConnection) url.openConnection();
     connection.setDoOutput(true);
     // optional cookies
     connection.setRequestProperty(context.getString(R.string.cookie_prefix), cookies);
     connection.connect();
     // use the cleaner to "clean" the HTML and return it as a TagNode object
     TagNode root = cleaner.clean(new InputStreamReader(connection.getInputStream()));
     Object[] foundNodes = root.evaluateXPath(xPath);
     if (foundNodes.length > 0) {
         TagNode foundNode = (TagNode) foundNodes[0];
         snap = foundNode.getAttributeByName(attrToSnap);
     }
     return snap;
 }

props.setAllowMultiWordAttributes(true);
props.setRecognizeUnicodeChars(true);
props.setOmitComments(true);

public CleanHtmlFunction() {
  this.cleaner = new HtmlCleaner();
  CleanerProperties p = cleaner.getProperties();
  p.setOmitComments(true);
  p.setTranslateSpecialEntities(true);
  p.setTransResCharsToNCR(true);
  // remove all tags that contain uninteresting content
  p.setPruneTags("style,script,form,object,audio,video");
}

public Set<String> validateNonEmpty(String html) {
  final Set<String> result = new HashSet<>();
  final HtmlCleaner cleaner = new HtmlCleaner();
  final CleanerProperties properties = cleaner.getProperties();
  properties.setOmitXmlDeclaration(true);
  properties.setOmitHtmlEnvelope(true);
  properties.setOmitComments(true);
  properties.setNamespacesAware(false);
  properties.setDeserializeEntities(true);
  if (isEmpty(cleaner.clean(html))) {
    result.add(ValidatorMessages.HTML_IS_EMPTY);
  }
  return result;
}

 final HtmlCleaner mCleaner = new HtmlCleaner();   
CleanerProperties props = mCleaner.getProperties();  
  props.setAllowHtmlInsideAttributes(true);  
  props.setAllowMultiWordAttributes(true);  
  props.setRecognizeUnicodeChars(true);  
  props.setOmitComments(true);      
 /*url from were data to be fetched*/  
String mSiteUrl="http://www.example.com";   
String mXPath="//div";   
//TagnNode for storing data received from url  
final TagNode mGetDataFromUrl;   //Establish connection   URL
url=new URL(mSiteUrl);   final URLConnection
mCCon=url.openConnection();   mGetDataFromUrl=mCleaner .clean(new   
InputStreamReader(mCCon.getInputStream()));   //get to xpath from
were data is to be retrieve    Object[]
mPageData=mGetDataFromUrl.evaluateXPath(mXPath);   //validate object
if(mPageData.length>0) {
   TagNode mXPathParsedData = (TagNode) mPageData[0];  
   // all text in div is in mData   
   Strign mData=mXPathParsedData .getText().trim();   }

private void init() {
  
  // Initialize HTMLCleaner
  cleaner = new HtmlCleaner();
  CleanerProperties props = cleaner.getProperties();
  props.setAllowHtmlInsideAttributes(true);
  props.setAllowMultiWordAttributes(true);
  props.setRecognizeUnicodeChars(true);
  props.setOmitComments(true);
  props.setNamespacesAware(false);
  
  // Initialize DomSerializer
  domSerializer = new DomSerializer(props);
  
  // Initialize xml parser		
  try {
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilder = documentBuilderFactory.newDocumentBuilder();
  } catch (ParserConfigurationException e) {
    // THIS CAN NEVER HAPPEN
  }
}

props.setAllowMultiWordAttributes(true);
props.setRecognizeUnicodeChars(true);
props.setOmitComments(true);
try {
  URL url = new URL(playUrl);

props.setOmitComments( toBoolean(omitComments) );

props.setOmitComments(toBoolean(omitComments));

props.setOmitDeprecatedTags(this.omitdeprtags);
props.setTreatDeprecatedTagsAsContent(this.treatdeprtagsascontent);
props.setOmitComments(this.omitcomments);
props.setOmitXmlDeclaration(this.omitxmldecl);
props.setOmitDoctypeDeclaration(this.omitdoctypedecl);

/**
 * Cleans the relevant file and generates a valid XML file ready for processing to Sel 2 java File.
 *
 * @param absoluteFilename - name of the file to convert.
 * @return String - location of the converted file.
 */
public String convertToXML(String absoluteFilename) throws Exception {
  FileHandler fromSelIDE = new FileHandler(absoluteFilename);
  FileHandler toXML = new FileHandler(System.getProperty("java.io.tmpdir") + File.separator + fromSelIDE.getFileName() + ".xml", true);
  if (fromSelIDE.getFile().isDirectory()) {
    LOGGER.error("Cannot convert directory {} into a Selenium Test!", fromSelIDE.getFileName());
    return null;
  }
  //Clean up html so that we can read it as XML properly
  HtmlCleaner cleaner = new HtmlCleaner();
  CleanerProperties XMLPrefs = cleaner.getProperties();
  XMLPrefs.setUseEmptyElementTags(true);
  XMLPrefs.setTranslateSpecialEntities(true);
  XMLPrefs.setTransResCharsToNCR(true);
  XMLPrefs.setOmitComments(true);
  XMLPrefs.setOmitComments(true);
  XMLPrefs.setOmitDoctypeDeclaration(true);
  XMLPrefs.setNamespacesAware(false);
  TagNode tagNode = new HtmlCleaner(XMLPrefs).clean(fromSelIDE.getFile());
  new PrettyXmlSerializer(XMLPrefs).writeToStream(tagNode, toXML.getWritableFileOutputStream(), "utf-8");
  toXML.close();
  return toXML.getAbsoluteFile();
}

public HtmlProcessorImpl(final HtmlProcessorConfig config) {
  this.config = config;
  final CleanerProperties properties = new CleanerProperties();
  properties.setOmitHtmlEnvelope(true);
  properties.setOmitXmlDeclaration(true);
  properties.setOmitComments(config.isOmitComments());
  parser = new HtmlCleaner(properties);
  filter = new WhitelistHtmlFilter(config.getWhitelistElements(), config.isOmitJavascriptProtocol());
  serializer = HtmlSerializerFactory.create(config.getSerializer(), properties);
}

/**
 * htmlcleaner로 html string을 xml string으로 바꿔주는 메소드.
 * @param source
 * @return
 */
private String toXML(String source){
  try {
    CleanerProperties props = new CleanerProperties();
    props.setTranslateSpecialEntities(true);
    props.setOmitComments(true);
    props.setPruneTags("script,style");
    // namespace를 무시한다.
    props.setNamespacesAware(false);
    props.setAdvancedXmlEscape(true);
    props.setTranslateSpecialEntities(true);
    HtmlCleaner cl = new HtmlCleaner(props);
    TagNode tagNode = cl.clean(source);
    source = new PrettyXmlSerializer(props).getXmlAsString(tagNode);
  } catch (IOException e) {
    logger.error("",e);
  }
  return source;
}
//test용

props.setAllowMultiWordAttributes(true);
props.setRecognizeUnicodeChars(true);
props.setOmitComments(true);

CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(false);
props.setOmitComments(true);
props.setOmitUnknownTags(true);
props.setOmitDoctypeDeclaration(true);

How to use setOmitCommentsmethodin org.htmlcleaner.CleanerProperties

Best Java code snippets using org.htmlcleaner.CleanerProperties.setOmitComments (Showing top 20 results out of 315)

How to use
setOmitComments
method
in
org.htmlcleaner.CleanerProperties