HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode); Object result; try {
TagNode tagNode = new HtmlCleaner().clean("<html><div><p>test"); DomSerializer ser = new DomSerializer(new CleanerProperties()); org.w3c.dom.Document doc = ser.createDOM(tagNode);
element.appendChild(document.createCDATASection(outputCData((CData) item))); boolean specialCase = dontEscape(element); if (escapeXml && !specialCase) { content = Utils.escapeXml(content, props, true); createSubnodes(document, subelement, subTagNode.getAllChildren()); createSubnodes(document, element, sublist);
ContentNode contentNode = (ContentNode) item; String content = contentNode.getContent(); boolean specialCase = dontEscape(element); if (escapeXml && !specialCase) { content = Utils.escapeXml(content, props, true); createSubnodes(document, subelement, subTagNode.getAllChildren()); createSubnodes(document, element, sublist);
private void init() { // Initialize HTMLCleaner cleaner = new HtmlCleaner(); CleanerProperties props = cleaner.getProperties(); props.setAllowHtmlInsideAttributes(true); props.setAllowMultiWordAttributes(true); props.setRecognizeUnicodeChars(true); props.setOmitComments(true); props.setNamespacesAware(false); // Initialize DomSerializer domSerializer = new DomSerializer(props); // Initialize xml parser try { DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilder = documentBuilderFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { // THIS CAN NEVER HAPPEN } }
public Document createDOM(TagNode rootNode) throws ParserConfigurationException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); DOMImplementation impl = builder.getDOMImplementation(); Document document; // // Where a DOCTYPE is supplied in the input, ensure that this is in the output DOM. See issue #27 // if (rootNode.getDocType() != null){ String qualifiedName = rootNode.getDocType().getPart1(); String publicId = rootNode.getDocType().getPublicId(); String systemId = rootNode.getDocType().getSystemId(); DocumentType documentType = impl.createDocumentType(qualifiedName, publicId, systemId); document = impl.createDocument(rootNode.getNamespaceURIOnPath(""), qualifiedName, documentType); } else { document = builder.newDocument(); Element rootElement = document.createElement(rootNode.getName()); document.appendChild(rootElement); } createSubnodes(document, (Element)document.getDocumentElement(), rootNode.getAllChildren()); return document; }
/** * encapsulate content with <[CDATA[ ]]> for things like script and style elements * @param element * @return true if <[CDATA[ ]]> should be used. */ protected boolean dontEscape(Element element) { // make sure <script src=..></script> doesn't get turned into <script src=..><[CDATA[]]></script> // TODO check for blank content as well. return props.isUseCdataForScriptAndStyle() && isScriptOrStyle(element) && !element.hasChildNodes(); } private void createSubnodes(Document document, Element element, List tagChildren) {
HtmlCleaner cleaner = new HtmlCleaner(); TagNode node = cleaner.clean(html); DomSerializer ser = new DomSerializer(cleaner.getProperties()); Document myW3cDoc = ser.createDOM(node);
createSubnodes(document, (Element) document.getDocumentElement(), rootNode.getAllChildren());
HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode); Object result; try {
public static Document toXhtml(String html) throws ParserConfigurationException { HtmlCleaner cleaner = new HtmlCleaner(); TagNode tagNode = cleaner.clean(html); DomSerializer domSerializer = new DomSerializer(new CleanerProperties()); return domSerializer.createDOM(tagNode); }
Document dom = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
private Document clean(String content) throws ParserConfigurationException { HtmlCleaner cleaner = new HtmlCleaner(); TagNode rootNode = cleaner.clean(content); // convert to DOM CleanerProperties properties = new CleanerProperties(); properties.setOmitComments(true); DomSerializer domSerializer = new DomSerializer(properties); Document doc = domSerializer.createDOM(rootNode); return doc; }
org.w3c.dom.Document doc = null; try { doc = new DomSerializer(props, false).createDOM(tagNode); } catch (ParserConfigurationException e) { LOGGER.error("conver dom error!", e);
cleanedXmlHtml = domSerializer.createDOM(tagNode); } else if(content.getContentType().contains(new StringBuilder("/xml")) || content.getContentType().contains(new StringBuilder("+xml"))) {