@Override public boolean match(final Node node, final List<List<CssSelector>> selectors) { List<Node> matchedNodes = new NodeSelector(node).select(selectors); return !matchedNodes.isEmpty(); } }
protected List<Node> select(final Node rootNode, final List<CssSelector> selectors) { // start with the root node List<Node> nodes = new ArrayList<>(); nodes.add(rootNode); // iterate all selectors for (CssSelector cssSelector : selectors) { // create new set of results for current css selector List<Node> selectedNodes = new ArrayList<>(); for (Node node : nodes) { walk(node, cssSelector, selectedNodes); } // post-processing: filter out the results List<Node> resultNodes = new ArrayList<>(); int index = 0; for (Node node : selectedNodes) { boolean match = filter(selectedNodes, node, cssSelector, index); if (match) { resultNodes.add(node); } index++; } // continue with results nodes = resultNodes; } return nodes; }
walkDescendantsIteratively(nodes, cssSelector, result); break; case CHILD: for (int i = 0; i < childCount; i++) { Node node = rootNode.getChild(i); selectAndAdd(node, cssSelector, result); Node node = rootNode.getNextSiblingElement(); if (node != null) { selectAndAdd(node, cssSelector, result); break; selectAndAdd(node, cssSelector, result);
/** * Selects nodes using CSS3 selector query and returns the very first one. */ public Node selectFirst(final String query) { List<Node> selectedNodes = select(query); if (selectedNodes.isEmpty()) { return null; } return selectedNodes.get(0); }
@Test void testTwoHtml() throws IOException { File file = new File(testDataRoot, "two.html"); String htmlContent = FileUtil.readString(file); Document document = new LagartoDOMBuilder().parse(htmlContent); Node html = new NodeSelector(document).select("html").get(0); assertNotNull(html); Node body = new NodeSelector(html).selectFirst("body"); Element h1 = body.getFirstChildElement(); assertEquals("h1", h1.getNodeName()); Node comment1 = body.getFirstChild().getNextSibling(); assertEquals(Node.NodeType.COMMENT, comment1.getNodeType()); Element p = (Element) new NodeSelector(body).selectFirst("p"); assertEquals(h1, p.getPreviousSiblingElement()); assertEquals(h1, comment1.getNextSiblingElement()); assertNull(comment1.getNextSiblingName()); // check if filter works just for sub elements List<Node> p_ems = new NodeSelector(p).select("em"); assertEquals(1, p_ems.size()); Element script = (Element) new NodeSelector(html).selectFirst("script"); assertEquals("text/javascript", script.getAttribute("type")); assertTrue(document.check()); }
/** * Creates node selector. */ protected NodeSelector createNodeSelector(final Node node) { return new NodeSelector(node); }
/** * Selects nodes using {@link NodeFilter node filter}. */ public List<Node> select(final NodeFilter nodeFilter) { List<Node> nodes = new ArrayList<>(); walk(rootNode, nodeFilter, nodes); return nodes; }
/** * Walks over the child notes, maintaining the tree order and not using recursion. */ protected void walkDescendantsIteratively(final LinkedList<Node> nodes, final CssSelector cssSelector, final List<Node> result) { while (!nodes.isEmpty()) { Node node = nodes.removeFirst(); selectAndAdd(node, cssSelector, result); // append children in walking order to be processed right after this node int childCount = node.getChildNodesCount(); for (int i = childCount - 1; i >= 0; i--) { nodes.addFirst(node.getChild(i)); } } }
document = new LagartoDOMBuilder().parse(html); NodeSelector nodeSelector = new NodeSelector(document); Element div = (Element) nodeSelector.selectFirst("div.ysites-col"); Element h2 = (Element) div.getFirstChild();
/** * Selects nodes using CSS3 selector query. */ public LinkedList<Node> select(String query) { String[] singleQueries = StringUtil.splitc(query, ','); LinkedList<Node> results = new LinkedList<Node>(); for (String singleQuery : singleQueries) { CSSelly csselly = createCSSelly(singleQuery); List<CssSelector> selectors = csselly.parse(); List<Node> selectedNodes = select(rootNode, selectors); for (Node selectedNode : selectedNodes) { if (results.contains(selectedNode) == false) { results.add(selectedNode); } } } return results; }
/** * Selected nodes using pre-parsed CSS selectors. Take in consideration * collection type for results grouping order. */ public List<Node> select(final Collection<List<CssSelector>> selectorsCollection) { List<Node> results = new ArrayList<>(); for (List<CssSelector> selectors : selectorsCollection) { processSelectors(results, selectors); } return results; }
/** * Selects nodes using {@link NodeFilter node filter} and return the very first one. */ public Node selectFirst(final NodeFilter nodeFilter) { List<Node> selectedNodes = select(nodeFilter); if (selectedNodes.isEmpty()) { return null; } return selectedNodes.get(0); }
private NodeSelector createNodeFilter() throws IOException { File file = new File(testDataRoot, "one.html"); String html = FileUtil.readString(file); return new NodeSelector(new LagartoDOMBuilder().parse(html)); } }
protected void walk(final Node rootNode, final NodeFilter nodeFilter, final List<Node> result) { int childCount = rootNode.getChildNodesCount(); for (int i = 0; i < childCount; i++) { Node node = rootNode.getChild(i); if (nodeFilter.accept(node)) { result.add(node); } walk(node, nodeFilter, result); } }
/** * Walks over the child notes, maintaining the tree order and not using recursion. */ protected void walkDescendantsIteratively(final LinkedList<Node> nodes, final CssSelector cssSelector, final List<Node> result) { while (!nodes.isEmpty()) { Node node = nodes.removeFirst(); selectAndAdd(node, cssSelector, result); // append children in walking order to be processed right after this node int childCount = node.getChildNodesCount(); for (int i = childCount - 1; i >= 0; i--) { nodes.addFirst(node.getChild(i)); } } }
/** * Selected nodes using pre-parsed CSS selectors. Take in consideration * collection type for results grouping order. */ public List<Node> select(final Collection<List<CssSelector>> selectorsCollection) { List<Node> results = new ArrayList<>(); for (List<CssSelector> selectors : selectorsCollection) { processSelectors(results, selectors); } return results; }
/** * Process selectors and keep adding results. */ protected void processSelectors(final List<Node> results, final List<CssSelector> selectors) { List<Node> selectedNodes = select(rootNode, selectors); for (Node selectedNode : selectedNodes) { if (!results.contains(selectedNode)) { results.add(selectedNode); } } }
@Test void test250() { String html = "<html>\n" + " <body>\n" + " <a href=\"/go?to=foobar&index=null\" title=\"Choice 1\">link</a>\n" + " </body>\n" + "</html>"; LagartoDOMBuilder domBuilder = new LagartoDOMBuilder(); NodeSelector nodeSelector = new NodeSelector(domBuilder.parse(html)); List<Node> selectedNodes = nodeSelector.select("a[title='Choice 1']"); System.out.println(); assertEquals("/go?to=foobar&index=null", selectedNodes.get(0).getAttribute("href")); }
protected List<Node> select(final Node rootNode, final List<CssSelector> selectors) { // start with the root node List<Node> nodes = new ArrayList<>(); nodes.add(rootNode); // iterate all selectors for (CssSelector cssSelector : selectors) { // create new set of results for current css selector List<Node> selectedNodes = new ArrayList<>(); for (Node node : nodes) { walk(node, cssSelector, selectedNodes); } // post-processing: filter out the results List<Node> resultNodes = new ArrayList<>(); int index = 0; for (Node node : selectedNodes) { boolean match = filter(selectedNodes, node, cssSelector, index); if (match) { resultNodes.add(node); } index++; } // continue with results nodes = resultNodes; } return nodes; }
walkDescendantsIteratively(nodes, cssSelector, result); break; case CHILD: for (int i = 0; i < childCount; i++) { Node node = rootNode.getChild(i); selectAndAdd(node, cssSelector, result); Node node = rootNode.getNextSiblingElement(); if (node != null) { selectAndAdd(node, cssSelector, result); break; selectAndAdd(node, cssSelector, result);