String htmlString = "<html><div><p><strong>\"notClickable1\"<a rel=\"nofollow\" target=\"_blank\" href=\"example1.com\">clickable</a>\"notClickable2\"<a rel=\"nofollow\" target=\"_blank\" href=\"example2.com\">clickable</a>\"notClickable3\"<a rel=\"nofollow\" target=\"_blank\" href=\"example3.com\">clickable</a></strong></p></div></html>"; Document doc = Jsoup.parse(htmlString); //can be replaced with Jsoup.connect("yourUrl").get(); String parsedHTML = ""; Element container = doc.select("div>p>strong").first(); for (Node node : container.childNodes()) { if(node.nodeName().equals("a") && node.previousSibling().nodeName().equals("#text")){ parsedHTML += node.previousSibling().toString().replaceAll("\"", ""); parsedHTML += "= " + node.attr("href").toString() + " "; } } parsedHTML.trim(); System.out.println(parsedHTML);
public static String readAnnotation(Node node) { if (null == node) return null; Node preceding = node.previousSibling(); //if this is a text node, then match for annotations return readAnnotation(node.outerHtml()); }
public static String readAnnotation(Node node) { if (null == node) return null; Node preceding = node.previousSibling(); //if this is a text node, then match for annotations return readAnnotation(node.outerHtml()); }
public static String readAnnotation(Node node) { if (null == node) return null; Node preceding = node.previousSibling(); //if this is a text node, then match for annotations return readAnnotation(node.outerHtml()); }
private void normalizeEmptySpaceBetweenNodes(Element parent) { List<Node> children = parent.childNodes(); if (!children.isEmpty()) { children = new ArrayList<>(children); for (Node child : children) { Node previousSibling = child.previousSibling(); Node nextSibling = child.nextSibling(); if (child instanceof TextNode && previousSibling instanceof Element && nextSibling instanceof Element) { TextNode textNode = (TextNode) child; Element prevElement = (Element) previousSibling; Element nextElement = (Element) nextSibling; normalizeTextBetweenNodes(textNode, prevElement, nextElement); } } } }
if (child instanceof TextNode) { TextNode childTextNode = (TextNode) child; Node previousSibling = child.previousSibling(); if (previousSibling instanceof TextNode) { TextNode previousSiblingTextNode = (TextNode) previousSibling;
(previousNode = previousNode.previousSibling()) != null;) { (previous = previous.previousSibling()) != null;) { previous = previous.previousSibling()) {
(previousNode = previousNode.previousSibling()) != null; ) { if (previousNode instanceof Element) { (previous = previous.previousSibling()) != null; ) { if (previous instanceof Element previous = previous.previousSibling()) { beforeChildren.add(previous);