Document doc = Jsoup.connect("http://www.marca.com").get(); Elements allNodes = doc.getAllElements()
public Html(String document) { this.document = document; this.allElements = Jsoup.parse(this.document).getAllElements(); }
String s="<TR><TD><INPUT TYPE=\"RADIO\" NAME=\"lccp_trndtl\" VALUE=\"12708NZM KCG YNNYNYNYA\" ONCLICK=\"return farefill('12708NZM KCG YNNYNYNYA','12708','NZM ',0,0,1,0,1,0,1,0,0,0,0)\" CHECKED>12708</TD>"; Document doc = Jsoup.parse(s.trim()); Elements links = doc.getAllElements(); if(links != null && links.size() != 0) Log.v("output",String.valueOf(links.size())+" "+ links.get(0).text()); else Log.v("output","Size of links is 0");
public static void getInputElements(Document doc, String urls) { Elements elements = doc.getAllElements(); for (Element element : elements) { HtmlElements htmlElements = new HtmlElements(); Properties attributes = new Properties(); // ...
private static final String PRE_TAG = "pre"; public static void parseHtmlDoc(Document doc) { Elements elementList = doc.getAllElements(); for (Element element : elementList) { //if the tag isn't <pre> add it to new elements collection if(element.tag().toString().compareTo(PRE_TAG) != 0) { element.text(element.text().replaceAll("<br>", "")); } } }
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; public class JSoup { public static void main(String[] args) { String fichier = "<html>" + "<head></head>" + "<body></body>" + "</html>"; Document dc = Jsoup.parse(fichier, "utf-8"); Elements elements = dc.getAllElements(); elements.forEach( element -> System.out.println(element.nodeName())); } }
HashSet<String> allTags=new HashSet<String>(); Document doc=Jsoup.connect("http://seenyc.co/").get(); Elements elements=doc.getAllElements(); for(Element ele:elements){ String s=ele.tagName(); Attributes n=ele.attributes(); allTags.add(s); } // here your hashset will have all distinct tag names from website
Document doc = Jsoup.parse(html_contents); for (Element element : doc.getAllElements()) { for (Attribute attribute : element.attributes()) { if(attribute.getKey().equalsIgnoreCase("alt")) { names.add(attribute.getValue()); } } }
public static String extractText(File file) throws IOException { StringBuilder sb = new StringBuilder(); Document document = Jsoup.parse(file, null); Elements body = document.getAllElements(); for (Element e : body) { for (TextNode t : e.textNodes()) { String s = t.text(); if (StringUtils.isNotBlank(s)) sb.append(t.text()).append(" "); } } return sb.toString(); }
private void removeComments() { for (Element element : document.getAllElements()) { List<Node> comments = new ArrayList<>(); for (Node node : element.childNodes()) { if (node instanceof Comment) comments.add(node); } deleteNodes(comments); } }
String url = "http://www.testthisblog.com"; Pattern pattern = Pattern.compile(".*Posted by*"); Document doc = Jsoup.connect(url).get(); Elements els = doc.getAllElements(); for (int i = 0; i < els.size(); i++) { Element element = els.get(i); String txt = element.ownText(); Matcher matcher = pattern.matcher(txt); if (matcher.find()) { System.out.println(txt); System.out.println(element.tagName()); System.out.println(element.className()); } }
Document document = ...; Elements elements = document.getAllElements(); Element comment = null; int size = elements.size(); for (int i = 0; comment == null && i < size; i++) { Element element = elements.get(i); for (Node node : element.childNodes()) { if (node instanceof Comment) { String str = ((Comment) node).getData().trim(); if ("BEGIN TOPICS".equals(str)) { comment = element; break; } } } } // Did we find <-- BEGIN TOPICS -->? if (comment != null) { // You can now select from the siblingElements of comment // and only get stuff "after" that comment: // e.g. Elements e = comment.siblingElements().select("a"); } else { // Oh snap. }
for(Element element : linkClick.getAllElements()) { for(Attribute attribute : element.attributes()) { if (attribute.getValue().equalsIgnoreCase("#30x30_bullhorn")) {
public static void main(String[] args) { Document doc; try { doc = Jsoup.connect("https://developer.mozilla.org/en-US/docs/Web/HTML/Element/br").get(); //Create a new empty elements collection Elements allElementsInDom = new Elements(); //send to a method that recurses over all child elements and adds them to the new collection recurseOverElements(doc.getAllElements(), allElementsInDom); //Iterate over all the elements and their child elements from the DOM for (Element element : allElementsInDom) { System.out.println(element.tagName()); } } catch (IOException e) { e.printStackTrace(); } }
@Override public Collection<? extends Node> getDescendentNodes(final Node node) { Elements descendents; if (node instanceof Document) descendents = ((Document)node).getAllElements(); else descendents = ((Element)node).getAllElements(); descendents.remove(node); // Jsoup includes the target of getAllElements() in the result... return descendents; }
public void stripComments(Document doc) { List<Node> comments = new ArrayList<>(); doc.getAllElements().forEach( elem -> { if ( ! elem.tagName().equals("style") && ! elem.equals("script") ) { elem.childNodes().forEach( child -> { if ( child instanceof Comment) { comments.add(child); } }); } }); comments.forEach(node -> node.remove()); }
String htmlString = "<div><ul><li>some menu item</li><li>some menu item</li><li>some menu item</li></ul></div><div><h3>Tile of some text</h3><p></p><p>some text</p><ul><li>some other text</li><li>some other text</li><li>some other text</li></ul></div>"; Document doc = Jsoup.parse(htmlString); for (Element element : doc.getAllElements()) { if(element.nodeName().equals("p") && element.childNodes().size()==0){ System.out.println(element.toString()); } else if(element.childNodes().size()>0 && element.childNode(0).nodeName().equals("#text")){ System.out.println(element.toString()); } }
Elements elements = doc.getAllElements();
Document doc = Jsoup.parse("<html>\n" + " <head></head>\n" + " <body>\n" + "<table><div class=\"wrapper\">\n" + "<h1 value=\"something\" class=header>Header</h1>\n" + "<div id=\"article1\" class=\"article\" name=\"something\" >\n" + "<img clsas=\"mistake\" src=\"picture.jpg\" id=\"pict1\" class=\"image_article\" alt=\"picture\" />\n" + "<p class=\"article_text\" >Lorem ipsum dolor sit amet, consectetur adipiscing. </p>\n" + "<a href=\"article.html\" title=\"More\">Více</a>\n" + "</div></body></html>" ); for (Element element : doc.getAllElements()) { for (Attribute attribute : element.attributes()) { switch (attribute.getKey()) { case "class": case "id": case "alt": case "src": case "name": case "href": break; default: element.removeAttr(attribute.getKey()); } } } System.out.println(doc);
traverseRecursivly(doc.getAllElements().first(), ROOT);