@Override public Page convert(ResponseBody responseBody) throws IOException { Document document = Jsoup.parse(responseBody.string()); List<String> links = new ArrayList<>(); for (Element element : document.select("a[href]")) { links.add(element.attr("href")); } return new Page(document.title(), Collections.unmodifiableList(links)); } }
/** Get an attribute value from the first matched element that has the attribute. @param attributeKey The attribute key. @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), or if the no elements have the attribute, returns empty string. @see #hasAttr(String) */ public String attr(String attributeKey) { for (Element element : this) { if (element.hasAttr(attributeKey)) return element.attr(attributeKey); } return ""; }
public List<String> getAllVideoUrls(Document doc) { // div.6u h3 a.click-trigger List<String> result = new ArrayList<>(); Elements videoLinks = doc.select("div.6u h3 a.click-trigger"); for (Element e : videoLinks) { if (e.hasAttr("href")) { result.add(VIDEO_URL_PREFIX + e.attr("href")); } } return result; }
import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class Sample { public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("http://howto.unixdev.net").get(); for (Element file : doc.select("td.right td a")) { System.out.println(file.attr("href")); } } }
String demo = "<select id='list'><option value='0'>First value</option><option value='1'>Second value</option><option value='2'>Third value</option></select>"; Document document = Jsoup.parse(demo); Elements options = document.select("select > option"); for(Element element : options) { if(element.text().equalsIgnoreCase("second value")) { System.out.println(element.attr("value")); } }
@Override protected List<String> getURLsFromPage(Document page) { List<String> imageURLs = new ArrayList<>(); for (Element thumb : page.select("#galleryImages > a > img")) { if (!thumb.hasAttr("src")) { continue; } String imageURL = thumb.attr("src"); imageURL = imageURL.replace( "/thumbs/", "/"); imageURLs.add(imageURL); } System.out.println("Found" + imageURLs.size() + " image urls"); return imageURLs; }
@Override @NotNull public Set<SearchEntity> search(@NotNull String keyword) { Set<SearchEntity> result = new HashSet<>(); try { Document doc = Jsoup.connect(SEARCH_URL + keyword).get(); Elements searchItems = doc.getElementsByClass("b_title"); if (!searchItems.isEmpty()) { for (Element element : searchItems) { if (element.childNodeSize() > 0 && element.child(0).childNodeSize() > 0) { Element nodeA = element.child(0).child(0); String url = nodeA.attr("href"); if (!Utils.isEmpty(url)) { System.out.println(url); } } } } } catch (IOException e) { e.printStackTrace(); } return result; }
@Override public Document getNextPage(Document doc) throws IOException { String nextURL = null; for (Element a : doc.select("a.link3")) { if (a.text().contains("next")) { nextURL = "http://imagefap.com/gallery.php" + a.attr("href"); break; } } if (nextURL == null) { throw new IOException("No next page found"); } sleep(1000); return Http.url(nextURL).get(); }
Document doc = Jsoup.connect("http://example.com") Element link = doc.select("a").first(); String text = doc.body().text(); // "An example link" String linkHref = link.attr("href"); // "http://example.com/" String linkText = link.text(); // "example"" String linkOuterH = link.outerHtml(); // "<a href="http://example.com"><b>example</b></a>" String linkInnerH = link.html(); // "<b>example</b>"
if (!html.hasAttr(attribute.getKey())) html.attributes().put(attribute); Element body = stack.get(1); for (Attribute attribute : startTag.getAttributes()) { if (!body.hasAttr(attribute.getKey())) body.attributes().put(attribute); tb.reconstructFormattingElements(); Element el = tb.insertEmpty(startTag); if (!el.attr("type").equalsIgnoreCase("hidden")) tb.framesetOk(false); } else if (StringUtil.inSorted(name, Constants.InBodyStartMedia)) { if (startTag.attributes.hasKey("action")) { Element form = tb.getFormElement(); form.attr("action", startTag.attributes.get("action"));
public static void main(String[] args) throws Exception { URI uri = URI.create("http://www.amerisourcebergendrug.com"); Document d = Jsoup.connect(uri.toString()).get(); for (Element refresh : d.select("html head meta[http-equiv=refresh]")) { Matcher m = Pattern.compile("(?si)\\d+;\\s+url=(.+)|\\d+") .matcher(refresh.attr("content")); // find the first one that is valid if (m.matches()) { if (m.group(1) != null) d = Jsoup.connect(uri.resolve(m.group(1)).toString()).get(); break; } } }
String url = "http://www.portal.pwr.wroc.pl/box_main_page_news,241.dhtml?limit=10"; String baseURL = "http://www.portal.pwr.wroc.pl/"; Document doc = Jsoup.connect(url).get(); Elements links = doc.select(".title_1 > a"); for (Element link : links) { System.out.println("Title - " + link.text()); System.out.println(baseURL + link.attr("href")); }
@Override public Document getNextPage(Document doc) throws IOException { Elements pageNumbers = doc.select("ul.pagination a[href]"); if (!pageNumbers.isEmpty() && pageNumbers.last().text().contains("Next")) { return Http.url(VIDEO_URL_PREFIX + pageNumbers.last().attr("href")).get(); } throw new IOException("No next page found."); }
private List<String> getMediaFromPage(Document doc) { List<String> results = new ArrayList<>(); for (Element el : doc.select("img.img-front")) { if (el.hasAttr("src")) { if (el.attr("src").startsWith("https:")) { results.add(el.attr("src")); } else { results.add("https:" + el.attr("src")); } else if (el.hasAttr("data-src")) { results.add(el.attr("data-src")); for (Element el : doc.select("source[label=HD]")) { if (el.attr("src").startsWith("https:")) { results.add(el.attr("src")); results.add("https:" + el.attr("src")); for (Element el : doc.select("source[label=SD]")) { if (el.attr("src").startsWith("https:")) { results.add(el.attr("src")); results.add("https:" + el.attr("src"));