@NonNull @Override public TailFilterDecision tail(Node node, int depth) { if (signatureFound) { return TailFilterDecision.CONTINUE; } if (node instanceof Element) { Element element = (Element) node; boolean elementIsBr = element.tag().equals(BR); if (elementIsBr || element.tag().equals(P)) { lastElementCausedLineBreak = true; brElementPrecedingDashes = elementIsBr ? element : null; return TailFilterDecision.CONTINUE; } } lastElementCausedLineBreak = false; return TailFilterDecision.CONTINUE; } }
void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline())) { if (accum instanceof StringBuilder) { if (((StringBuilder) accum).length() > 0) indent(accum, depth, out); } else { indent(accum, depth, out); } } accum.append('<').append(tagName()); if (attributes != null) attributes.html(accum, out); // selfclosing includes unknown tags, isEmpty defines tags that are always empty if (childNodes.isEmpty() && tag.isSelfClosing()) { if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) accum.append('>'); else accum.append(" />"); // <img> in html, <img /> in xml } else accum.append('>'); }
if (!el.tag().isFormSubmittable()) continue; // contents are form listable, superset of submitable if (el.hasAttr("disabled")) continue; // skip disabled form inputs String name = el.attr("name");
@Override public boolean matches(Element root, Element element) { final Element p = element.parent(); if (p==null || p instanceof Document) return false; int pos = 0; Elements family = p.children(); for (Element el : family) { if (el.tag().equals(element.tag())) pos++; } return pos == 1; } @Override
@NonNull @Override public HeadFilterDecision head(Node node, int depth) { if (signatureFound) { return HeadFilterDecision.REMOVE; } if (node instanceof Element) { lastElementCausedLineBreak = false; Element element = (Element) node; if (element.tag().equals(BLOCKQUOTE)) { return HeadFilterDecision.SKIP_ENTIRELY; } } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) { Node nextNode = node.nextSibling(); if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) { signatureFound = true; if (brElementPrecedingDashes != null) { brElementPrecedingDashes.remove(); brElementPrecedingDashes = null; } return HeadFilterDecision.REMOVE; } } } return HeadFilterDecision.CONTINUE; }
private void insertNode(Node node) { // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc if (stack.size() == 0) doc.appendChild(node); else if (isFosterInserts()) insertInFosterParent(node); else currentElement().appendChild(node); // connect form controls to their form element if (node instanceof Element && ((Element) node).tag().isFormListed()) { if (formElement != null) formElement.addElement((Element) node); } }
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (out.prettyPrint() && ((siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) || (out.outline() && siblingNodes().size()>0 && !isBlank()) )) indent(accum, depth, out); boolean normaliseWhite = out.prettyPrint() && parent() instanceof Element && !Element.preserveWhitespace(parent()); Entities.escape(accum, coreValue(), out, false, normaliseWhite, false); }
Element adopter = new Element(formatEl.tag(), tb.getBaseUri()); adopter.attributes().addAll(formatEl.attributes()); Node[] childNodes = furthestBlock.childNodes().toArray(new Node[furthestBlock.childNodeSize()]);
Elements tdElements = doc.getElementsByTag("td"); for(Element element : tdElements ) { //Print the value of the element System.out.println(element.tag()); }
Field tagName = Tag.class.getDeclaredField("tagName"); // Get the field which contains the tagname tagName.setAccessible(true); // Set accessible to allow changes for( Element element : doc.select("*") ) // Iterate over all tags { Tag tag = element.tag(); // Get the tag of the element String value = tagName.get(tag).toString(); // Get the value (= name) of the tag if( !value.startsWith("#") ) // You can ignore all tags starting with a '#' { tagName.set(tag, value.toUpperCase()); // Set the tagname to the uppercase } } tagName.setAccessible(false); // Revert to false
String tags = "<html><head></head><body><table><tr><td>1</td></tr><tr><td><table><tr><td>3</td><td>4</td></tr></table></td></tr></table><body></html>"; Document doc = Jsoup.parse(tags); for( Element e : doc.select("*") // you can use 'doc.getAllElements()' here too { System.out.println(e.tag()); }
private static final String PRE_TAG = "pre"; public static void parseHtmlDoc(Document doc) { Elements elementList = doc.getAllElements(); for (Element element : elementList) { //if the tag isn't <pre> add it to new elements collection if(element.tag().toString().compareTo(PRE_TAG) != 0) { element.text(element.text().replaceAll("<br>", "")); } } }
private static void filterElementsByTag(List<Element> results, Element element, Set<String> tagSet) { if(tagSet.contains(element.tag().getName().toLowerCase())) results.add(element); for(Element child : element.children()) filterElementsByTag(results, child, tagSet); }