@Test public void html_1() throws IOException { String s = ""; for (File f : new File("src/test/resources/xml").listFiles()) { System.out.println("parsing: " + f); new HtmlParser().parse(f); } }
public Node parse(Node xml, String s) { s = s.trim(); int i; T context = createContext(); xml.name("document"); Node child = new Node(); while ((i = context.parse(s.toCharArray(), child) + 1) < s.length()) { if (child.name == null && child.children.size() == 1) child = child.children.get(0); xml.add(child); child = new Node(); context = createContext(); context.i = i; } if (child.name == null && child.children.size() == 1) child = child.children.get(0); xml.add(child); return xml; }
@Override protected boolean isSelfClosedTag(String name) { if (name.equals("%@")) return true; return super.isSelfClosedTag(name); }
@Test public void html_3() throws IOException { String s = "<div><script>\n" + " var a\n" + " for(var i=0; i<10;i++) {\n" + " a+=i;" + " }\n" + "</script></div>"; Node root = new HtmlParser().parse(s); Node div = root.children().get(0); Assert.assertEquals(1, div.children().size()); Node script = div.children.get(0); Assert.assertEquals("var a\n" + " for(var i=0; i<10;i++) {\n" + " a+=i;" + " }\n", script.text()); }
@Test public void html_2() throws IOException { String s = "<div width=100px></div>"; Node root = new HtmlParser().parse(s); Node div = root.children().get(0); Assert.assertEquals(1, div.attributes().size()); Assert.assertEquals(0, div.children().size()); Assert.assertEquals("100px", div.attr("width")); s = "<div width=100px height=50px></div>"; root = new HtmlParser().parse(s); div = root.children().get(0); Assert.assertEquals(2, div.attributes().size()); Assert.assertEquals(0, div.children().size()); Assert.assertEquals("100px", div.attr("width")); Assert.assertEquals("50px", div.attr("height")); }
@Test public void html_doctype() throws IOException { String s = "<!DOCTYPE html>"; Node doc = new HtmlParser().parse(s); Assert.assertEquals(1, doc.children().size()); Assert.assertEquals("!DOCTYPE", doc.get(0).name()); Assert.assertEquals(1, doc.get(0).attributes().size()); Assert.assertEquals(true, doc.get(0).attributes().containsKey("html")); Assert.assertEquals(null, doc.get(0).attributes().get("html")); }
@Test public void html_doctype2() throws IOException { String s = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"; Node doc = new HtmlParser().parse(s); Assert.assertEquals(1, doc.children().size()); Assert.assertEquals("!DOCTYPE", doc.get(0).name()); Assert.assertEquals(4, doc.get(0).attributes().size()); Assert.assertEquals(true, doc.get(0).attributes().containsKey("HTML")); Assert.assertEquals(null, doc.get(0).attributes().get("HTML")); Assert.assertEquals(true, doc.get(0).attributes().containsKey("PUBLIC")); Assert.assertEquals(null, doc.get(0).attributes().get("PUBLIC")); Assert.assertEquals(true, doc.get(0).attributes().containsKey("\"-//W3C//DTD HTML 4.01//EN\"")); Assert.assertEquals(null, doc.get(0).attributes().get("\"-//W3C//DTD HTML 4.01//EN\"")); Assert.assertEquals(true, doc.get(0).attributes().containsKey("\"http://www.w3.org/TR/html4/strict.dtd\"")); Assert.assertEquals(null, doc.get(0).attributes().get("\"http://www.w3.org/TR/html4/strict.dtd\"")); }