private void extractAttribute(Element tag, String attributeName) { String url = tag.attr(attributeName); String normalizedUrl = normalizeUrlValue(url); if(normalizedUrl != null) { urls.addURL(normalizedUrl, baseUrl.url); } }
/** * Get the URLs for all the resources that a browser would automatically * download following the download of the HTML content, that is: images, * stylesheets, javascript files, applets, etc... * <p> * N.B. The Iterator returns URLs, but the Collection will contain objects * of class URLString. * * @param userAgent * User Agent * @param html * HTML code * @param baseUrl * Base URL from which the HTML code was obtained * @param coll * Collection - will contain URLString objects, not URLs * @param encoding Charset * @return an Iterator for the resource URLs * @throws HTMLParseException when parsing the <code>html</code> fails */ public Iterator<URL> getEmbeddedResourceURLs( String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException { return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding); }
return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(col),encoding);
private void extractAttribute(Tag tag, String attributeName) { CharSequence url = tag.getAttributeValue(attributeName); String normalizedUrl = normalizeUrlValue(url); if(normalizedUrl != null) { urls.addURL(normalizedUrl, baseUrl.url); } }