public void tail(Node node, int depth) { // make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two". if (node instanceof Element) { Element element = (Element) node; if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum)) accum.append(' '); } } }, this);
@NonNull @Override public HeadFilterDecision head(Node node, int depth) { if (signatureFound) { return HeadFilterDecision.REMOVE; } if (node instanceof Element) { lastElementCausedLineBreak = false; Element element = (Element) node; if (element.tag().equals(BLOCKQUOTE)) { return HeadFilterDecision.SKIP_ENTIRELY; } } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) { Node nextNode = node.nextSibling(); if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) { signatureFound = true; if (brElementPrecedingDashes != null) { brElementPrecedingDashes.remove(); brElementPrecedingDashes = null; } return HeadFilterDecision.REMOVE; } } } return HeadFilterDecision.CONTINUE; }
/** * Start a depth-first traverse of the root and all of its descendants. * @param visitor Node visitor. * @param root the root node point to traverse. */ public static void traverse(NodeVisitor visitor, Node root) { Node node = root; int depth = 0; while (node != null) { visitor.head(node, depth); if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { visitor.tail(node, depth); node = node.parentNode(); depth--; } visitor.tail(node, depth); if (node == root) break; node = node.nextSibling(); } } }
while (node.nextSibling() == null && depth > 0) { if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) { tailResult = filter.tail(node, depth); node = node.nextSibling();
while (node.nextSibling() == null && depth > 0) { return result; node = node.nextSibling(); if (result == FilterResult.REMOVE)
Element trackDictNode = null; { Node temp = keyNode.nextSibling(); while (temp != null && !(temp instanceof Element)) temp = temp.nextSibling(); trackDictNode = temp; }
String html = "<div class=\"newclass \">\n" + " <div>\n" + " <p> \n" + " <strong>Arist:</strong> Picasso Biggie <em>|</em>\n" + " <strong>Released:</strong> 3 years ago <em>|</em>\n" + " <strong>Album:</strong> Picasso Biggie: The Big OneUp <em>|</em> \n" + " <strong>Producer:</strong> Various <em>|</em> \n" + " <strong>Featuring:</strong> Mount Kimbie <em>|</em> \n" + " </p>\n" + " </div>\n" + "</div>"; Document doc = Jsoup.parse(html); Elements dakss1 = doc.select("div p strong"); for (Node dakss : dakss1) { System.out.println(dakss.nextSibling().toString().replace(" ", "").trim()); }
public void tail(Node node, int depth) { // make sure there is a space between block tags and immediately // following text nodes <div>One</div>Two should be "One Two". if (node instanceof Element) { Element element = (Element) node; if (element == excluded) { excluded = null; } if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !lastCharIsWhitespace(accum)) accum.append(' '); } } }, node);
public static String textPlus(Element elem) { List<TextNode> textNodes = elem.textNodes(); if (textNodes.isEmpty()) return ""; StringBuilder result = new StringBuilder(); // start at the first text node Node currentNode = textNodes.get(0); while (currentNode != null) { // append deep text of all subsequent nodes if (currentNode instanceof TextNode) { TextNode currentText = (TextNode) currentNode; result.append(currentText.text()); } else if (currentNode instanceof Element) { Element currentElement = (Element) currentNode; result.append(currentElement.text()); } currentNode = currentNode.nextSibling(); } return result.toString(); }
private void normalizeEmptySpaceBetweenNodes(Element parent) { List<Node> children = parent.childNodes(); if (!children.isEmpty()) { children = new ArrayList<>(children); for (Node child : children) { Node previousSibling = child.previousSibling(); Node nextSibling = child.nextSibling(); if (child instanceof TextNode && previousSibling instanceof Element && nextSibling instanceof Element) { TextNode textNode = (TextNode) child; Element prevElement = (Element) previousSibling; Element nextElement = (Element) nextSibling; normalizeTextBetweenNodes(textNode, prevElement, nextElement); } } } }
currentNode = currentNode.nextSibling();
private Element nextTag(Tag tag, Element current) { Element nextTag = null; for (Node nextNode = current; (nextNode = nextNode.nextSibling()) != null;) { if (nextNode instanceof Element) { Element nextElement = (Element) nextNode; if (tag.equals(nextElement.tag())) { nextTag = nextElement; } break; } else if (nextNode instanceof TextNode && !((TextNode) nextNode).isBlank()) { break; } } return nextTag; }
paragraphChildren.add(next); } while ((next = next.nextSibling()) != null);
private Element inlineTextToParagraph(Node next) { if (next == null) { return null; } List<Node> paragraphChildren = new ArrayList<Node>(); do { if (next instanceof Element && ((Element) next).isBlock()) { break; } else { paragraphChildren.add(next); } } while ((next = next.nextSibling()) != null); if (paragraphChildren.isEmpty()) { return null; } Element lastParagraph = new Element(P_TAG, ""); for (Node child : paragraphChildren) { child.remove(); lastParagraph.appendChild(child.clone()); } return lastParagraph; }
private Element nextTag(Tag tag, Element current) { Element nextTag = null; for (Node nextNode = current; (nextNode = nextNode.nextSibling()) != null; ) { if (nextNode instanceof Element) { Element nextElement = (Element) nextNode; if (tag.equals(nextElement.tag())) { nextTag = nextElement; } break; } else if (nextNode instanceof TextNode && !((TextNode) nextNode).isBlank()) { break; } } return nextTag; }
public void traverse(Node root) { Node node = root; int depth = 0; while (node != null) { if (filter(node) || replace(node)) { node = node.nextSibling(); continue; } head(node, depth); if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { tail(node, depth); node = node.parentNode(); depth--; } tail(node, depth); if (node == root) { break; } node = node.nextSibling(); } } }
Node nextSibling = currentNode.nextSibling(); if (nextSibling == null) { break;
if (branch != null && branch.nextSibling() instanceof TextNode) { item.setBranch(((TextNode) branch.nextSibling()).text().trim());
public void fixFigrefListItem(Element element) { Node next = element.nextSibling(); String trailingTxt; if (next != null && next instanceof TextNode) { trailingTxt = ((TextNode) next).getWholeText(); } else if (next != null && next instanceof Element) { trailingTxt = ((Element) next).text(); } else { return; } if (trailingTxt.matches("^(, |,? and )")) { next = element.nextSibling().nextSibling(); if (next.nodeName().toLowerCase().equals("b")) { String containedTxt = ((TextNode) next.childNode(0)).getWholeText(); if (containedTxt.matches("[0-9]{1,2}[A-z]?")) { Element newEl = element.clone(); newEl.attr("id", "FR-" + Strings.padStart(containedTxt, 4, '0')); newEl.attr("idref", ReferenceTagger.createFigId(containedTxt)); newEl.tagName("a"); newEl.addClass("figref"); newEl.text(containedTxt); next.replaceWith(newEl); fixFigrefListItem(newEl); } } } }
break; } while ((next = next.nextSibling()) != null); while ((next = next.nextSibling()) != null) { if (!(next instanceof TextNode && ((TextNode) next).isBlank())) {