@Nullable public static Element getElementByTag(Element element, String tagName) { Elements elements = element.getElementsByTag(tagName); if (elements != null && elements.size() > 0) { return elements.get(0); } else { return null; } } }
File input = new File("/tmp/input.html"); Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/"); //http://jsoup.org/cookbook/input/load-document-from-url //Document doc = Jsoup.connect("http://example.com/").get(); Element content = doc.getElementById("content"); Elements links = content.getElementsByTag("a"); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); }
public String tableToCsv(String url) throws IOException { Document doc = Jsoup.connect(url).get(); Elements tables = doc.select("table"); if (tables.size() != 1) { throw new IllegalStateException( "Reading html to table currently works if there is exactly 1 html table on the page. " + " The URL you passed has " + tables.size() + ". You may file a feature request with the URL if you'd like your pagae to be supported"); } Element table = tables.get(0); CsvWriterSettings settings = new CsvWriterSettings(); StringWriter stringWriter = new StringWriter(); CsvWriter csvWriter = new CsvWriter(stringWriter, settings); for (Element row : table.select("tr")) { Elements headerCells = row.getElementsByTag("th"); Elements cells = row.getElementsByTag("td"); String[] nextLine = Stream.concat(headerCells.stream(), cells.stream()) .map(Element::text).toArray(String[]::new); csvWriter.writeRow(nextLine); } return stringWriter.toString(); } }
public static SusiThought locationWiseTime(String query) { Document html = null; JSONArray arr = new JSONArray(); try { html = Jsoup.connect("http://www.timeanddate.com/worldclock/results.html?query=" + query).get(); } catch (IOException e) { DAO.severe(e); } Elements locations = html.select("td"); int i = 0; for (Element e : locations) { if (i % 2 == 0) { JSONObject obj = new JSONObject(); String l = e.getElementsByTag("a").text(); obj.put("location", l); String t = e.nextElementSibling().text(); obj.put("time", t); arr.put(obj); } i++; } SusiThought json = new SusiThought(); json.setData(arr); return json; }
for (Element vid : vids) { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); URLs.add(videoURL); for (Element vid : profile_vids) { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); URLs.add(videoURL);
public static List<URL> getURLs(URL url) throws IOException{ Response resp = Http.url(url) .ignoreContentType() .response(); Document doc = resp.parse(); List<URL> URLs = new ArrayList<>(); //Pictures Elements imgs = doc.getElementsByTag("img"); for (Element img : imgs) { if (img.hasClass("album-image")) { String imageURL = img.attr("src"); URLs.add(new URL(imageURL)); } } //Videos Elements vids = doc.getElementsByTag("video"); for (Element vid : vids) { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); URLs.add(new URL(videoURL)); } } return URLs; } }
result.put("group_imageLink", imageLink); topicList = meetupHTML.getElementById("topic-box-2012").getElementsByTag("a"); recentMeetupsSection = meetupHTML.getElementById("recentMeetups").getElementsByTag("p");
Document doc = Jsoup.parse(html); Element content = doc.getElementById("someid"); Elements p= content.getElementsByTag("p"); String pConcatenated=""; for (Element x: p) { pConcatenated+= x.text(); } System.out.println(pConcatenated);//sometext another p tag
@Override public void readDesign(Element design, DesignContext designContext) { super.readDesign(design, designContext); for (Element paramElement : design.getElementsByTag("parameter")) { setParameter(paramElement.attr("name"), paramElement.attr("value")); } }
@NotNull @Override public Set<SearchEntity> search(@NotNull String keyword) { Set<SearchEntity> result = new HashSet<>(); try { Document doc = Jsoup.connect(SEARCH_URL + keyword).timeout(3000).get(); Elements searchItems = doc.getElementsByClass("g"); if (!searchItems.isEmpty()) { for (Element element : searchItems) { Elements nodeA = element.getElementsByTag("a"); System.out.println(nodeA.first()); } } } catch (IOException e) { e.printStackTrace(); } return result; }
@Override public void readDesign(Element design, DesignContext designContext) { Elements elems = design.getElementsByTag("poster"); for (Element poster : elems) { if (getPoster() == null && poster.hasAttr("href")) { setPoster(DesignAttributeHandler.readAttribute("href", poster.attributes(), Resource.class)); } poster.remove(); } // Poster is extracted so AbstractMedia does not include it in alt text super.readDesign(design, designContext); }
Elements colgroups = table.getElementsByTag("colgroup"); if (colgroups.size() != 1) { throw new DesignException( for (Element col : colgroups.get(0).getElementsByTag("col")) { String id = DesignAttributeHandler.readAttribute("column-id", col.attributes(), null, String.class);
Element table = doc.select("table.table"); Elements links = table.getElementsByTag("a"); for (Element link: links) { String url = link.attr("href"); String text = link.text(); System.out.println(text + ", " + url); }
Elements headlinesCat1 = doc.getElementsByTag("h3"); for (Element headline : headlinesCat1) { Elements importantLinks = headline.getElementsByTag("a"); for (Element link : importantLinks) { String linkHref = link.attr("href"); String linkText = link.text(); //THIS IS THE TEXT I WANTED... System.out.println(linkHref); } }
Document doc; //comes as parameter Elements divs = doc.getElementsByTag("div"); for(Element div: divs){ if(div.getElementsByTag("div").size() == 1){ //is a div with no divs inside it } }
private void removeWhitespaceImmeditatelyPrecedingBrTags(Element body) { for (Element element : body.getElementsByTag("br")) { //$NON-NLS-1$ removeWhitespaceBefore(element); } }
Elements myElements = doc.getElementsByClass("jedalny_listok_tabulka") .first().getElementsByTag("td"); for (Element element : myElements) { if (element.className().contains("jedlo")) { String foodContent = element.ownText(); String foodAllergen = ""; for (Element href : element.getElementsByTag("a")) { foodAllergen += " " + href.text(); } System.out.println(foodContent + " : " + foodAllergen); } }
// Only one line to parse an external content Document doc = Jsoup.connect("http://jsoup.org").get(); // "Javascript-like" syntax Element content = doc.getElementById("content"); Elements links = content.getElementsByTag("a"); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); } // "Jquery/Css-like" syntax Elements resultLinks = doc.select("h3.r > a"); Elements pngs = doc.select("img[src$=.png]");
Document doc = Jsoup.parse(input, "UTF-8", "http://www.mhpa.co.uk/notice-to-mariners/"); Element content = doc.getElementById("content"); Elements divs = content.getElementsByTag("div"); int ntmAmount = 0; for (Element div : divs) { if (div.hasClass("news_main")) ntmAmount++; }
@Test public void should_add_a_hardbreak_at_end_of_each_line_when_hardbreaks_option_is_set() throws IOException { Attributes attributes = attributes().hardbreaks(true).get(); String content = asciidoctor.convert("read\nmy\nlips", OptionsBuilder.options().attributes(attributes)); Document doc = Jsoup.parse(content, "UTF-8"); Element paragraph = doc.getElementsByAttributeValue("class","paragraph").first(); assertThat(paragraph.getElementsByTag("br").size(), is(2)); }