Refine search
URL url = new URL("http://www.nseindia.com/content/equities/niftysparks.htm"); Document doc = Jsoup.parse(url, 3000); Element table = doc.select("table[class=niftyd]").first(); Iterator<Element> ite = table.select("td[width=65]").iterator(); ite.next(); // first one is image, skip it System.out.println("Value 1: " + ite.next().text()); System.out.println("Value 2: " + ite.next().text()); System.out.println("Value 3: " + ite.next().text()); System.out.println("Value 4: " + ite.next().text());
public List<TaobaoModel> getTaobao(String url){ Document doc=getUrlDoc(url); if(doc==null){ return new ArrayList<>(); } List<TaobaoModel> list=new ArrayList<>(); Elements trs = doc.select("div[class=item-block item-idle sh-roundbox]"); try { for (int i=0; i<trs.size(); i++){ TaobaoModel taobaoModel=new TaobaoModel(); Element user = trs.get(i).select("div[class=seller-avatar]").get(0); taobaoModel.user=user.select("a").attr("title"); Element info = trs.get(i).select("div[class=item-info]").get(0); Elements pic = info.select("div[class=item-pic]"); Elements img=pic.get(0).select("img"); taobaoModel.image="http:"+img.attr("data-ks-lazyload-custom"); Elements attributes = info.select("div[class=item-attributes]"); taobaoModel.region=attributes.get(0).select("div[class=item-location]").get(0).text(); taobaoModel.price= MathUtil.getDouble(attributes.get(0).select("em").get(0).text()); taobaoModel.title=info.select("div[class=item-brief-desc]").get(0).text(); taobaoModel.time=info.select("span[class=item-pub-time]").get(0).text(); taobaoModel.sign=taobaoModel.getSign(); list.add(taobaoModel); } } catch (Exception e) { e.printStackTrace(); } return list; }
Document document = Jsoup.connect(url).get(); Element articleHeader = document.select("#articleHeader").first(); String headline = articleHeader.select(".headline").text(); String subheadline = articleHeader.select(".subheadline").text(); String us_details = articleHeader.select(".us_details").text(); // ...
Elements parents = doc.select("div.main_stats"); for (Element child : parents) { Element label = child.select("h3.product_title").first(); Element score = child.select("span.metascore_w.medium.game").first(); System.out.println("Game **" + label.text()+ "** has a Metascore of ->> " + score.text()); }
try { Document doc = Jsoup.connect(html).get(); Elements tableElements = doc.select("table"); System.out.println("headers"); for (int i = 0; i < tableHeaderEles.size(); i++) { System.out.println(tableHeaderEles.get(i).text()); System.out.println(); Element row = tableRowElements.get(i); System.out.println("row"); Elements rowItems = row.select("td"); for (int j = 0; j < rowItems.size(); j++) { System.out.println(rowItems.get(j).text());
String URL = "http://pastebin.com/raw/Sa2MRCTQ"; Document doc = Jsoup.connect(URL).get(); Elements trs = doc.select("tr:has(td.liste-startzeit)"); String currentDay = null; for (Element tr : trs){ Element tdDay = tr.select("td.liste-wochentag").first(); if (tdDay!=null){ currentDay = tdDay.text(); } Element tdStart = tr.select("td.liste-startzeit").first(); System.out.println(currentDay +" : "+tdStart.text()); }
Element latestArticle = doc.select("div#latest-article").first(); Elements articles = latestArticle.select("article"); for (Element article : articles) { // get the value from href attribute Element link = article.select("div.post-text > h3.title > a").first(); String linkHref = link.attr("href"); String linkText = link.text(); System.out.println(linkText + " - " + linkHref); }
public List<TaobaoModel> getTaobao(String url){ Document doc=getUrlDoc(url); if(doc==null){ return new ArrayList<>(); } List<TaobaoModel> list=new ArrayList<>(); Elements trs = doc.select("div[class=item-block item-idle sh-roundbox]"); try { for (int i=0; i<trs.size(); i++){ TaobaoModel taobaoModel=new TaobaoModel(); Element user = trs.get(i).select("div[class=seller-avatar]").get(0); taobaoModel.user=user.select("a").attr("title"); Element info = trs.get(i).select("div[class=item-info]").get(0); Elements pic = info.select("div[class=item-pic]"); Elements img=pic.get(0).select("img"); taobaoModel.image="http:"+img.attr("data-ks-lazyload-custom"); Elements attributes = info.select("div[class=item-attributes]"); taobaoModel.region=attributes.get(0).select("div[class=item-location]").get(0).text(); taobaoModel.price= MathUtil.getDouble(attributes.get(0).select("em").get(0).text()); taobaoModel.title=info.select("div[class=item-brief-desc]").get(0).text(); taobaoModel.time=info.select("span[class=item-pub-time]").get(0).text(); taobaoModel.sign=taobaoModel.getSign(); list.add(taobaoModel); } } catch (Exception e) { e.printStackTrace(); } return list; }
private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) { Document parsedDocument = Jsoup.parse(unparsedHtml); List<Chapter> chapterList = new ArrayList<>(); for (Element a : parsedDocument.select("div.post-cnt a")) { Chapter newChapter = DefaultFactory.Chapter.constructDefault(); newChapter.setUrl("http://www.pecintakomik.com" + a.attr("href")); newChapter.setName(a.text()); newChapter.setNew(!a.select("img[src=/images/new.gif]").isEmpty()); chapterList.add(newChapter); } // .....
String xml = "<CREDENTIALS>...</CREDENTIALS>"; Document doc = Jsoup.parse(xml, "", Parser.xmlParser()); //MEMBER_BENEFITS Element memberBenefits = doc.select("MEMBER_BENEFITS").first(); memberBenefits.select("USERNAME").text("newusername"); memberBenefits.select("PASSWORD").text("newpassword"); //ARTICLE_DOWNLOAD Element articleDownload = doc.select("ARTICLE_DOWNLOAD").first(); articleDownload.select("USERNAME").text("newusername"); articleDownload.select("PASSWORD").text("newpassword");
public class ScanWebSO { public static void main (String args[]) { Document doc; try{ doc = Jsoup.connect("https://www.google.com/search?as_q=&as_epq=%22Yorkshire+Capital%22+&as_oq=fraud+OR+allegations+OR+scam&as_eq=&as_nlo=&as_nhi=&lr=lang_en&cr=countryCA&as_qdr=all&as_sitesearch=&as_occt=any&safe=images&tbs=&as_filetype=&as_rights=").userAgent("Mozilla").ignoreHttpErrors(true).timeout(0).get(); Elements links = doc.select("li[class=g]"); for (Element link : links) { Elements titles = link.select("h3[class=r]"); String title = titles.text(); Elements bodies = link.select("span[class=st]"); String body = bodies.text(); System.out.println("Title: "+title); System.out.println("Body: "+body+"\n"); } } catch (IOException e) { e.printStackTrace(); } } }
Document doc = Jsoup.connect(url).get(); for (Element table : doc.select("table.table")) { for (Element row : table.select("tr")) { for (Element tds : row.select("td")) { Elements links = tds.select("a[href]"); for (Element link : links) { System.out.println("link : " + link.attr("href")); System.out.println("text : " + link.text()); } } } }
Element result = Jsoup.parse("<html><body><table><tr><td><a href=\"http://a.com\" /</td><td>Label1</td></tr><tr><td><a href=\"http://b.com\" /></td><td>Label2</td></tr></table></body></html>"); for (Element element : result.select("tr")) { if (element.select("tr.header.left").isEmpty()) { Elements tds = element.select("td"); String link = tds.get(0).getElementsByTag("a").attr("href"); String position = tds.get(1).text(); System.out.println(link + ", " + position); } }
@Override public String getAlbumTitle(URL url) throws MalformedURLException { try { // Attempt to use album title as GID Element titleElement = getFirstPage().select("h1.title").first(); String title = titleElement.text(); Element authorSpan = getFirstPage().select("span[class=creator]").first(); String author = authorSpan.select("a").first().text(); LOGGER.debug("Author: " + author); return getHost() + "_" + author + "_" + title.trim(); } catch (IOException e) { // Fall back to default album naming convention LOGGER.info("Unable to find title at " + url); } return super.getAlbumTitle(url); }
if (doc != null) { Elements elements = doc.select("content"); for (int i = 0; i < elements.size(); i++) { Element element = elements.get(i); Document doc2 = Jsoup.parse(element.text()); Elements liElements = doc2.select("li"); for (Element liElemet : liElements) { String strValue = liElemet.select("a").text(); } } }