protected List<URI> findXmlUrlReferences(final String directoryContents, final URL url) throws URISyntaxException { final List<URI> results = new ArrayList<URI>(); final List<String> urlFragments = extractHrefs(directoryContents); for (final String u : urlFragments) { if (u.endsWith(XML_EXTENSION)) { //points to xml if (u.startsWith("/")) { //absolute path need to add the protocol results.add(new URI(url.getProtocol() + ":" + u)); } else if (u.startsWith("http:")) { // full url results.add(new URI(u)); } else { // relative url stick the name on the end results.add(appendToURI(url, u)); } } } return results; }
@Test(groups = "fast") public void testExtractHrefs() { final String page = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">" + "<html>" + " <head>" + " <title>Index of /config/trunk/xno</title>" + " </head>" + " <body>" + "<h1>Index of /config/trunk/xno</h1>" + "<ul><li><a href=\"/config/trunk/\"> Parent Directory</a></li>" + "<li><a href=\"dg.xml\"> dg.xml</a></li>" + "<li><a href=\"replica.foo\"> replica/</a></li>" + "<li><a href=\"replica2/\"> replica2/</a></li>" + "<li><a href=\"replica_dyson/\"> replica_dyson/</a></li>" + "<li><a href=\"snv1/\"> snv1/</a></li>" + "<li><a href=\"viking.xml\"> viking.xml</a></li>" + "</ul>" + "<address>Apache/2.2.3 (CentOS) Server at <a href=\"mailto:kate@ning.com\">gepo.ningops.net</a> Port 80</address>" + "</body></html>"; final List<String> hrefs = loader.extractHrefs(page); Assert.assertEquals(hrefs.size(), 8); Assert.assertEquals(hrefs.get(0), "/config/trunk/"); Assert.assertEquals(hrefs.get(1), "dg.xml"); }
protected List<URI> findXmlUrlReferences(final String directoryContents, final URL url) throws URISyntaxException { final List<URI> results = new ArrayList<URI>(); final List<String> urlFragments = extractHrefs(directoryContents); for (final String u : urlFragments) { if (u.endsWith(XML_EXTENSION)) { //points to xml if (u.startsWith("/")) { //absolute path need to add the protocol results.add(new URI(url.getProtocol() + ":" + u)); } else if (u.startsWith("http:")) { // full url results.add(new URI(u)); } else { // relative url stick the name on the end results.add(appendToURI(url, u)); } } } return results; }
@Test(groups = "fast") public void testExtractHrefs() { final String page = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">" + "<html>" + " <head>" + " <title>Index of /config/trunk/xno</title>" + " </head>" + " <body>" + "<h1>Index of /config/trunk/xno</h1>" + "<ul><li><a href=\"/config/trunk/\"> Parent Directory</a></li>" + "<li><a href=\"dg.xml\"> dg.xml</a></li>" + "<li><a href=\"replica.foo\"> replica/</a></li>" + "<li><a href=\"replica2/\"> replica2/</a></li>" + "<li><a href=\"replica_dyson/\"> replica_dyson/</a></li>" + "<li><a href=\"snv1/\"> snv1/</a></li>" + "<li><a href=\"viking.xml\"> viking.xml</a></li>" + "</ul>" + "<address>Apache/2.2.3 (CentOS) Server at <a href=\"mailto:kate@ning.com\">gepo.ningops.net</a> Port 80</address>" + "</body></html>"; final List<String> hrefs = loader.extractHrefs(page); Assert.assertEquals(hrefs.size(), 8); Assert.assertEquals(hrefs.get(0), "/config/trunk/"); Assert.assertEquals(hrefs.get(1), "dg.xml"); }