void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (!(childNodes.isEmpty() && tag.isSelfClosing())) { if (out.prettyPrint() && (!childNodes.isEmpty() && ( tag.formatAsBlock() || (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && !(childNodes.get(0) instanceof TextNode)))) ))) indent(accum, depth, out); accum.append("</").append(tagName()).append('>'); } }
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (out.prettyPrint()) indent(accum, depth, out); accum .append("<!--") .append(getData()) .append("-->"); }
/** * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) * * @return String of HTML. * @see #outerHtml() */ public String html() { StringBuilder accum = StringUtil.stringBuilder(); html(accum); return getOutputSettings().prettyPrint() ? accum.toString().trim() : accum.toString(); }
/** * A method which parses html using Jsoup, * @param htmlText a text to parse. * @return a document with parsed text. */ private Document getParsedHtmlDocument(String htmlText) { Document doc = Jsoup.parseBodyFragment(htmlText); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); doc.head().append(DOC_STYLE); doc.body().append(HIGHLIGHT_JS_SCRIPT); return doc; }
@Override public Document process(final Metadata metadata, final Document document) { // If we have 1 node, it a p, it contains only text nodes, then treat it as pre if (document.body().children().size() == 1) { final Elements paragraphs = document.select("body > p"); if (paragraphs.size() == 1 && isAllTextNodes(paragraphs.first())) { paragraphs.first().tagName("pre"); document.body().textNodes().forEach(TextNode::remove); document.outputSettings().prettyPrint(false); } } return document; }
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
String url = request.getParameter("htmluri").trim(); System.out.println("Fetching %s..."+url); Document doc = Jsoup.connect(url).get(); Document.OutputSettings settings = doc.outputSettings(); settings.prettyPrint(false); settings.charset("ASCII"); String html = doc.html(); html = StringEscapeUtils.unescapeHtml(html); html = Jsoup.parse(html).html(); //This will take care of any extra closing tags System.out.println(html);
public static RichText fromHtml(String html) { Document doc = Jsoup.parse(html); doc.outputSettings().prettyPrint(false); RichText root = new RichText(""); parse(root, doc.getElementsByTag("body").get(0)); return root; }
private static Document parseXhtml(final String inputXhtml) { final Document originalDocument = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); originalDocument.outputSettings().prettyPrint(false); originalDocument.outputSettings().escapeMode(xhtml); originalDocument.outputSettings().charset("UTF-8"); return originalDocument; }
public String getHtmlBody(String text) { if (StringUtils.isBlank(text)) return text; org.jsoup.nodes.Document document = org.jsoup.Jsoup.parse(text); document.outputSettings().prettyPrint(false); return document.body().html(); }
@Override public String processHtml(String source) { org.jsoup.nodes.Document document = Jsoup.parse(source); processHtmlDocument(document); document.outputSettings() .syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) .prettyPrint(false) .escapeMode(Entities.EscapeMode.xhtml); return document.html(); }
@Override public String processHtml(String source) { org.jsoup.nodes.Document document = Jsoup.parse(source); processHtmlDocument(document); document.outputSettings() .syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) .prettyPrint(false) .escapeMode(Entities.EscapeMode.xhtml); return document.html(); }
@Nonnull static DocumentElement parseHtml(final String html) { final Document document = Jsoup.parse(html); document.outputSettings().prettyPrint(false); return new DocumentElement(document); }
Document doc = Jsoup.parse("" + "<p>THIS — IS A “TEST”. 5 > 4. trademark: ™</p>"); Document.OutputSettings settings = doc.outputSettings(); settings.prettyPrint(false); settings.escapeMode(Entities.EscapeMode.extended); settings.charset("ASCII"); String modifiedFileHtmlStr = doc.html(); System.out.println(modifiedFileHtmlStr);
/** * Writes the given jsoup document to the output stream (in UTF-8) * * @param doc * the document to write * @param outputStream * the stream to write to * @throws IOException * if writing fails */ private static void write(Document doc, OutputStream outputStream) throws IOException { doc.outputSettings().indentAmount(4); doc.outputSettings().syntax(Syntax.html); doc.outputSettings().prettyPrint(true); outputStream.write(doc.html().getBytes(UTF_8)); }
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
public String getDescription(String page) { try { // Fetch the image page Response resp = Http.url(page) .referrer(this.url) .response(); cookies.putAll(resp.cookies()); // Try to find the description Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]"); if (els.isEmpty()) { LOGGER.debug("No description at " + page); throw new IOException("No description found"); } LOGGER.debug("Description found!"); Document documentz = resp.parse(); Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is. // Would break completely if FurAffinity changed site layout. documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); LOGGER.debug("Returning description at " + page); String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name. } catch (IOException ioe) { LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); return null; } } @Override
void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline())) { if (accum instanceof StringBuilder) { if (((StringBuilder) accum).length() > 0) indent(accum, depth, out); } else { indent(accum, depth, out); } } accum.append('<').append(tagName()); if (attributes != null) attributes.html(accum, out); // selfclosing includes unknown tags, isEmpty defines tags that are always empty if (childNodes.isEmpty() && tag.isSelfClosing()) { if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) accum.append('>'); else accum.append(" />"); // <img> in html, <img /> in xml } else accum.append('>'); }
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (out.prettyPrint() && ((siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) || (out.outline() && siblingNodes().size()>0 && !isBlank()) )) indent(accum, depth, out); boolean normaliseWhite = out.prettyPrint() && parent() instanceof Element && !Element.preserveWhitespace(parent()); Entities.escape(accum, coreValue(), out, false, normaliseWhite, false); }
public static String toCompactString(Document document) { document.outputSettings() .prettyPrint(false) .indentAmount(0); return document.html(); } }