/** * Constructor. * * @param scanDir Scan directory. * @param sockFactory Socket factory. */ public URIContext(URL scanDir, SSLSocketFactory sockFactory) { this.scanDir = scanDir; this.sockFactory = sockFactory; tidy = new Tidy(); tidy.setQuiet(true); tidy.setOnlyErrors(true); tidy.setShowWarnings(false); tidy.setInputEncoding("UTF8"); tidy.setOutputEncoding("UTF8"); }
public void downloadSinglePage(String pageLink, String targetDir) throws XPathExpressionException, IOException { URL url = new URL(pageLink); BufferedInputStream page = new BufferedInputStream(url.openStream()); Tidy tidy = new Tidy(); tidy.setQuiet(true); tidy.setShowWarnings(false); Document response = tidy.parseDOM(page, null); XPathFactory factory = XPathFactory.newInstance(); XPath xPath=factory.newXPath(); NodeList nodes = (NodeList)xPath.evaluate(IMAGE_PATTERN, response, XPathConstants.NODESET); String imageURL = (String) nodes.item(0).getNodeValue(); saveImageNIO(imageURL, targetDir); }
/** * Returns <code>tidy</code> as HTML parser. * * @return a <code>tidy</code> HTML parser */ public static Tidy getParser() { log.debug("Start : getParser1"); Tidy tidy = new Tidy(); tidy.setInputEncoding(StandardCharsets.UTF_8.name()); tidy.setOutputEncoding(StandardCharsets.UTF_8.name()); tidy.setQuiet(true); tidy.setShowWarnings(false); if (log.isDebugEnabled()) { log.debug("getParser1 : tidy parser created - " + tidy); } log.debug("End : getParser1"); return tidy; }
private Document getMacroBodyDocument(String macroBodyXhtmlCleaned) { ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); try { Tidy tidy = new Tidy(); tidy.setQuiet(true); tidy.setErrout(new PrintWriter(errorStream)); return new DOMReader().read(new Tidy().parseDOM(new ByteArrayInputStream(macroBodyXhtmlCleaned.getBytes()), null)); } finally { LOG.error(String.format("There were some warnings parsing the specified HTML fragment\n%s", new String(errorStream.toByteArray()))); } } }
/** * Returns <code>tidy</code> as HTML parser. * * @return a <code>tidy</code> HTML parser */ private static Tidy getTidyParser(String encoding) { log.debug("Start : getParser"); Tidy tidy = new Tidy(); tidy.setInputEncoding(encoding); tidy.setOutputEncoding(StandardCharsets.UTF_8.name()); tidy.setQuiet(true); tidy.setShowWarnings(false); if (log.isDebugEnabled()) { log.debug("getParser : tidy parser created - " + tidy); } log.debug("End : getParser"); return tidy; }
BufferedInputStream bufferPage = new BufferedInputStream(new URL("http://www.yourUrl.com").openStream()); Tidy tidy = new Tidy(); tidy.setQuiet(true); tidy.setShowWarnings(false); tidy.setInputEncoding("UTF-8"); Document document = tidy.parseDOM(bufferPage, null); document.normalize();
public Document parseInputStream(InputStream is){ Tidy tidy = new Tidy(); tidy.setQuiet(true); tidy.setShowWarnings(false); Document d = tidy.parseDOM(is, null); return d; }
private Element getHtmlSummaryElement(String html) throws JDOMException, IOException { Tidy tidy = new Tidy(); tidy.setForceOutput(true); tidy.setQuiet(true); tidy.setShowWarnings(false); tidy.setXHTML(true); tidy.setPrintBodyOnly(true); StringWriter tidiedHtml = new StringWriter(); tidy.parse(new StringReader(html), tidiedHtml); Element htmlSummaryEl = new Element("html-summary"); htmlSummaryEl.addContent(new CDATA(tidiedHtml.toString())); return htmlSummaryEl; } }
Tidy tidy = new Tidy(); tidy.setShowErrors(0); tidy.setShowWarnings(false); tidy.setQuiet(true);
Tidy tidy = new Tidy(); tidy.setShowErrors(0); tidy.setQuiet(true); tidy.setErrout(null); doc = tidy.parseDOM(in, null);
/** * Create a Tidy parser with the specified settings. * * @param quiet - set the Tidy quiet flag? * @param showWarnings - show Tidy warnings? * @param isXml - treat the content as XML? * @param stringWriter - if non-null, use this for Tidy errorOutput * @return the Tidy parser */ public static Tidy makeTidyParser(boolean quiet, boolean showWarnings, boolean isXml, StringWriter stringWriter) { Tidy tidy = new Tidy(); tidy.setInputEncoding(StandardCharsets.UTF_8.name()); tidy.setOutputEncoding(StandardCharsets.UTF_8.name()); tidy.setQuiet(quiet); tidy.setShowWarnings(showWarnings); tidy.setMakeClean(true); tidy.setXmlTags(isXml); if (stringWriter != null) { tidy.setErrout(new PrintWriter(stringWriter)); } return tidy; }
/** * Default constructor. * * @throws PEPException */ public ListDatastreams() throws PEPException { super(); try { xFormer = XmlTransformUtility.getTransformer(); } catch (Exception e) { throw new PEPException("Error initialising SearchFilter", e); } tidy = new Tidy(); tidy.setShowWarnings(false); tidy.setQuiet(true); }
private static Tidy getParser( URL url ) { Tidy tidy = new Tidy(); tidy.setCharEncoding( org.w3c.tidy.Configuration.UTF8 ); tidy.setQuiet( true ); tidy.setShowWarnings( HTMLParserFactory.isParserWarningsEnabled() ); if (!HTMLParserFactory.getHTMLParserListeners().isEmpty()) { tidy.setErrout( new JTidyPrintWriter( url ) ); } return tidy; }
/** * Default constructor. * * @throws PEPException */ public FindObjects() throws PEPException { super(); try { xFormer = XmlTransformUtility.getTransformer(); } catch (Exception e) { throw new PEPException("Error initialising SearchFilter", e); } tidy = new Tidy(); tidy.setShowWarnings(false); tidy.setQuiet(true); }
private static Tidy getParser( URL url ) { Tidy tidy = new Tidy(); tidy.setCharEncoding( org.w3c.tidy.Configuration.UTF8 ); tidy.setQuiet( true ); tidy.setShowWarnings( HTMLParserFactory.isParserWarningsEnabled() ); if (!HTMLParserFactory.getHTMLParserListeners().isEmpty()) { tidy.setErrout( new JTidyPrintWriter( url ) ); } return tidy; }
private static Tidy getParser( URL url ) { Tidy tidy = new Tidy(); tidy.setCharEncoding( org.w3c.tidy.Configuration.UTF8 ); tidy.setQuiet( true ); tidy.setShowWarnings( HTMLParserFactory.isParserWarningsEnabled() ); if (!HTMLParserFactory.getHTMLParserListeners().isEmpty()) { tidy.setErrout( new JTidyPrintWriter( url ) ); } return tidy; }
public static String prettyPrintHTML(String rawHTML) { Tidy tidy = new Tidy(); tidy.setXHTML(true); tidy.setIndentContent(true); tidy.setPrintBodyOnly(true); tidy.setInputEncoding("UTF-8"); tidy.setOutputEncoding("UTF-8"); tidy.setSmartIndent(true); tidy.setShowWarnings(false); //to hide errors tidy.setQuiet(true); //to hide warning tidy.setTidyMark(false); // HTML to DOM Document htmlDOM = tidy.parseDOM(new ByteArrayInputStream(rawHTML.getBytes()), null); // Pretty Print OutputStream out = new ByteArrayOutputStream(); tidy.pprint(htmlDOM, out); return out.toString(); }
private synchronized Tidy configureTidy() { if (tidy == null) { tidy = new Tidy(); tidy.setMakeClean(true); tidy.setXmlOut(true); tidy.setTidyMark(false); tidy.setQuiet(true); tidy.setShowWarnings(false); tidy.setUpperCaseTags(true); tidy.setUpperCaseAttrs(true); } return tidy; }
/** * Default constructor, creates a new HtmlConverter.<p> * * @param htmlImport reference to the htmlimport * @param xmlMode switch for setting the import to HTML or XML mode */ public CmsHtmlImportConverter(CmsHtmlImport htmlImport, boolean xmlMode) { m_tidy.setTidyMark(false); m_tidy.setShowWarnings(false); m_tidy.setQuiet(true); m_tidy.setForceOutput(true); if (xmlMode) { m_tidy.setXmlTags(xmlMode); m_tidy.setXmlSpace(true); } initialiseTags(); m_htmlImport = htmlImport; }
private synchronized Tidy configureTidy() { if (tidy == null) { tidy = new Tidy(); tidy.setWraplen(Integer.MAX_VALUE); tidy.setMakeClean(true); tidy.setXmlOut(true); tidy.setTidyMark(false); tidy.setQuiet(true); tidy.setShowWarnings(false); tidy.setUpperCaseTags(true); tidy.setUpperCaseAttrs(true); } return tidy; }