/** * Returns <code>true</code> if provided element is one of the table-related elements. */ protected boolean isOneOfTableElements(final Element element) { String elementName = element.getNodeName().toLowerCase(); return StringUtil.equalsOne(elementName, TABLE_ELEMENTS) != -1; }
) { String elementNodeName = element.getNodeName().toLowerCase(); if (elementNodeName.equals("form")) { if (element.getChildNodesCount() > 0) {
@Test void testNamespaces() throws IOException { File file = new File(testDataRoot, "namespace.xml"); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableXmlMode(); lagartoDOMBuilder.getConfig().setCalculatePosition(true); Document doc = lagartoDOMBuilder.parse(FileUtil.readString(file)); assertTrue(doc.check()); Element cfgTestElement = (Element) doc.getChild(1); assertEquals("cfg:test", cfgTestElement.getNodeName()); Element cfgNode = (Element) cfgTestElement.getChild(0); assertEquals("cfg:node", cfgNode.getNodeName()); Jerry.JerryParser jerryParser = new Jerry.JerryParser(); ((LagartoDOMBuilder) jerryParser.getDOMBuilder()).enableXmlMode(); Jerry jerry = jerryParser.parse(FileUtil.readString(file)); final StringBuilder result = new StringBuilder(); jerry.$("cfg\\:test").each(($this, index) -> { result.append($this.$("cfg\\:node").text()); return true; }); assertEquals("This is a text", result.toString()); }
while (true) { String parentNodeName = parentNode.getNodeName(); if (!implRules.implicitlyCloseParentTagOnNewTag(parentNodeName, node.getNodeName())) { break; log.debug("Implicitly closed tag <" + node.getNodeName() + "> ");
@Test void testWhitespaces() throws IOException { String xmlContent = "<foo> <!--c--> <bar> </bar> <x/> </foo>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableXmlMode(); lagartoDOMBuilder.getConfig().setSelfCloseVoidTags(true); Document doc = lagartoDOMBuilder.parse(xmlContent); assertEquals(1, doc.getChildNodesCount()); Element foo = (Element) doc.getChild(0); assertEquals("foo", foo.getNodeName()); assertEquals(3, foo.getChildNodesCount()); Element bar = (Element) foo.getChild(1); assertEquals("bar", bar.getNodeName()); assertEquals(1, bar.getChildNodesCount()); // must be 1 as whitespaces are between open/closed tag assertEquals("<foo><!--c--><bar> </bar><x/></foo>", doc.getHtml()); assertTrue(doc.check()); }
@Test void testXmlDec() { LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableXmlMode(); Document document = lagartoDOMBuilder.parse("<?xml version=\"1.0\"?><div?></div>"); XmlDeclaration xml = (XmlDeclaration) document.getFirstChild(); assertEquals(0, xml.getAttributesCount()); assertEquals("xml", xml.getNodeName()); Element div = (Element) xml.getNextSibling(); assertEquals(0, div.getAttributesCount()); assertEquals("div?", div.getNodeName()); assertTrue(document.check()); }
@Test void testKelkoo() throws Exception { File file = new File(testDataRoot, "kelkoo.html"); Jerry jerry; try { jerry = Jerry.jerry().parse(FileUtil.readString(file)); } catch (Exception ex) { fail(ex.toString()); throw ex; } Element script = (Element) jerry.$("script").get(0); assertEquals("script", script.getNodeName()); assertEquals(6, script.getAttributesCount()); assertEquals("src", script.getAttribute(0).getName()); assertEquals("data-config", script.getAttribute(1).getName()); assertEquals("ext\\u00e9rieur|barbecue,", script.getAttribute(2).getName()); assertEquals("planchaaccessoires\":\"http:\\", script.getAttribute(3).getName()); assertEquals("www.kelkoo.fr\"}'", script.getAttribute(4).getName()); assertEquals("data-adsense-append", script.getAttribute(5).getName()); }
@Test void testOrphanAttribute() { LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document document = lagartoDOMBuilder.parse("<div qwe '8989' foo zoo='123'/>"); Element div = (Element) document.getFirstChild(); assertEquals("div", div.getNodeName()); assertEquals(4, div.getAttributesCount()); assertTrue(div.hasAttribute("qwe")); assertTrue(div.hasAttribute("foo")); assertTrue(div.hasAttribute("zoo")); assertTrue(div.hasAttribute("'8989'")); assertTrue(document.check()); }
@Test void testTwoHtml() throws IOException { File file = new File(testDataRoot, "two.html"); String htmlContent = FileUtil.readString(file); Document document = new LagartoDOMBuilder().parse(htmlContent); Node html = new NodeSelector(document).select("html").get(0); assertNotNull(html); Node body = new NodeSelector(html).selectFirst("body"); Element h1 = body.getFirstChildElement(); assertEquals("h1", h1.getNodeName()); Node comment1 = body.getFirstChild().getNextSibling(); assertEquals(Node.NodeType.COMMENT, comment1.getNodeType()); Element p = (Element) new NodeSelector(body).selectFirst("p"); assertEquals(h1, p.getPreviousSiblingElement()); assertEquals(h1, comment1.getNextSiblingElement()); assertNull(comment1.getNextSiblingName()); // check if filter works just for sub elements List<Node> p_ems = new NodeSelector(p).select("em"); assertEquals(1, p_ems.size()); Element script = (Element) new NodeSelector(html).selectFirst("script"); assertEquals("text/javascript", script.getAttribute("type")); assertTrue(document.check()); }
Document document = (Document) doc.get(0); Element divNode = (Element) document.getChild(0); assertEquals("div", divNode.getNodeName()); assertNotNull(divNode.getAttribute("myattr")); assertNotNull(divNode.getAttribute("myAttr")); document = (Document) doc.get(0); divNode = (Element) document.getChild(0); assertEquals("dIV", divNode.getNodeName()); assertNull(divNode.getAttribute("myattr"));
@Test void testHtmlNodesOwner() { Jerry doc = Jerry.jerry().parse("<div>1<div id='x'>2</div>3</div>"); doc.$("#x").html("<span>wow</span>"); assertEquals("<div>1<div id=\"x\"><span>wow</span></div>3</div>", doc.html()); Element divx = doc.get(0).getChildElement(0).getChildElement(0); assertSame(doc.get(0), divx.getOwnerDocument()); assertEquals("span", divx.getChildElement(0).getNodeName()); assertSame(doc.get(0), divx.getChildElement(0).getOwnerDocument()); assertSame(doc.get(0), divx.getChildElement(0).getChild(0).getOwnerDocument()); }
assertEquals(Node.NodeType.ELEMENT, html.getNodeType()); assertNotNull(html); assertEquals("html", html.getNodeName()); assertEquals(1, html.getChildNodesCount()); assertEquals(0, html.getAttributesCount()); assertEquals(Node.NodeType.ELEMENT, body.getNodeType()); assertNotNull(body); assertEquals("body", body.getNodeName()); assertEquals(1, body.getChildNodesCount()); assertNull(body.getAttribute("id")); assertEquals("p", p.getNodeName()); assertEquals(3, p.getChildNodesCount()); assertEquals("w173", p.getAttribute("id"));
/** * Returns <code>true</code> if provided element is one of the table-related elements. */ protected boolean isOneOfTableElements(final Element element) { String elementName = element.getNodeName().toLowerCase(); return StringUtil.equalsOne(elementName, TABLE_ELEMENTS) != -1; }
/** * Returns <code>true</code> if provided element is one of the table-related elements. */ protected boolean isOneOfTableElements(Element element) { String elementName = element.getNodeName().toLowerCase(); return StringUtil.equalsOne(elementName, TABLE_ELEMENTS) != -1; }
) { String elementNodeName = element.getNodeName().toLowerCase(); if (elementNodeName.equals("form")) { if (element.getChildNodesCount() > 0) {
) { String elementNodeName = element.getNodeName().toLowerCase(); if (elementNodeName.equals("form")) { if (element.getChildNodesCount() > 0) {
while (true) { String parentNodeName = parentNode.getNodeName(); if (!implRules.implicitlyCloseParentTagOnNewTag(parentNodeName, node.getNodeName())) { break; log.debug("Implicitly closed tag <" + node.getNodeName() + "> ");
while (true) { String parentNodeName = parentNode.getNodeName(); if (!implRules.implicitlyCloseParentTagOnNewTag(parentNodeName, node.getNodeName())) { break; log.debug("Implicitly closed tag <" + node.getNodeName() + "> ");
while (true) { String parentNodeName = parentNode.getNodeName(); if (!implRules.implicitlyCloseParentTagOnNewTag(parentNodeName, node.getNodeName())) { break; positionString = parentNode.position.toString(); log.debug("Implicitly closed tag <" + node.getNodeName() + "> " + positionString);