meetupGroupName = meetupHTML.getElementsByAttributeValue("property", "og:title").attr("content"); result.put("group_name", meetupGroupName); meetupType = meetupHTML.getElementsByAttributeValue("property", "og:type").attr("content"); result.put("meetup_type", meetupType); result.put("group_description", groupDescription); groupLocality = meetupHTML.getElementsByAttributeValue("property", "og:locality").attr("content"); result.put("group_locality", groupLocality); groupCountry = meetupHTML.getElementsByAttributeValue("property", "og:country-name").attr("content"); result.put("group_country_code", groupCountry); latitude = meetupHTML.getElementsByAttributeValue("property", "og:latitude").attr("content"); result.put("group_latitude", latitude); longitude = meetupHTML.getElementsByAttributeValue("property", "og:longitude").attr("content"); result.put("group_longitude", longitude); imageLink = meetupHTML.getElementsByAttributeValue("property", "og:image").attr("content"); result.put("group_imageLink", imageLink);
githubProfile.put("starred_data", starredData); String starred = html.getElementsByAttributeValue("class", "Counter").get(1).text(); githubProfile.put("starred", starred); githubProfile.put("followers_data", followersData); String followers = html.getElementsByAttributeValue("class", "Counter").get(2).text(); githubProfile.put("followers", followers); githubProfile.put("following_data", followingData); String following = html.getElementsByAttributeValue("class", "Counter").get(3).text(); githubProfile.put("following", following); Elements orgs = html.getElementsByAttributeValue("itemprop", "follows"); for (Element e : orgs) { JSONObject obj = new JSONObject();
Document doc = Jsoup.connect(url).get(); Elements infos; infos=doc.getElementsByAttributeValue("class", "li_1 clearfix");
else if (html.getElementsByAttributeValue("class", "orgnav").size() != 0) { this.scrapeGithubOrg(profile, githubProfile, html);
temp = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); if (temp.length() >= 20) { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content").substring(0,19); } else { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); temp = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); if (temp.length() >= 20) { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content").substring(0,19); } else { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); tagSection = htmlPage.getElementsByAttributeValue("data-automation", "ListingsBreadcrumbs"); tagSpan = tagSection.select("span"); topic = ""; temp = htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content"); if (temp.length() > 0) { latitude = Float .valueOf(htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content")); temp = htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content"); if (temp.length() > 0) { longitude = Float .valueOf(htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content")); .getElementsByAttributeValue("class", "js-follow js-follow-target follow-me fx--fade-in is-hidden") .attr("href"); organizerContactInfo = url + "#lightbox_contact";
/** * Method to scrape the given github organization and put the required data in githubProfile Post * @param githubProfile the Post to hold the scraped data * @param profile String variable holding the profile to be scraped * @param html the given html page to be scraped accoring to the required attributes */ private void scrapeGithubOrg( String profile, Post githubProfile, Document html) { githubProfile.put("user", profile); String shortDescription = html.getElementsByAttributeValueContaining("class", "TableObject-item TableObject-item--primary").get(0).child(2).text(); githubProfile.put("short_description", shortDescription); String homeLocation = html.getElementsByAttributeValueContaining("itemprop", "location").attr("title"); githubProfile.put("location", homeLocation); Elements navigation = html.getElementsByAttributeValue("class", "orgnav"); for (Element e : navigation) { String orgRepositoriesLink = e.child(0).tagName("a").attr("href"); githubProfile.put("organization_respositories_link", "https://github.com" + orgRepositoriesLink); String orgPeopleLink = e.child(1).tagName("a").attr("href"); githubProfile.put("organization_people_link", "https://github.com" + orgPeopleLink); String orgPeopleNumber = e.child(1).tagName("a").child(1).text(); githubProfile.put("organization_people_number", orgPeopleNumber); } }
latestVersion = doc.getElementsByAttributeValue ("itemprop","softwareVersion").first().text();
Map<String, String> structureUserManaged = new HashMap<String, String>(); Elements elementsUserManaged = loadExistingDoc.getElementsByAttributeValue("data-z", "user-managed");
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class Main { public static void main(String[] args) throws Exception { final Document document = Jsoup.parse("<html><head/><body><a href=\"#\" class=\"artist\">Soulive<span class=\"create-play\">Play</span></a></body></html>"); final Element elem = document.getElementsByAttributeValue("class", "artist").first(); System.out.println(elem.ownText()); } }
private String getMetaCharset() { Elements metas = this.html5Document.getElementsByAttributeValue("http-equiv", "Content-Type"); for (Element meta : metas) { if ("meta".equals(meta.tagName())) { return meta.attr("content"); } } return null; }
Document doc = Jsoup.parse("your html") String selectedVal = null; Elements options = doc.getElementsByAttributeValue("name", "Category").get(0).children(); for (Element option : options) { if (option.hasAttr("selected")) { selectedVal = option.val(); } }
@Test public void a_treeprocessor_as_string_should_be_executed_in_document() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.treeprocessor("org.asciidoctor.extension.TerminalCommandTreeprocessor"); String content = asciidoctor.convertFile( classpath.getResource("sample-with-terminal-command.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "command").first(); assertThat(contentElement.text(), is("echo \"Hello, World!\"")); contentElement = doc.getElementsByAttributeValue("class", "command").last(); assertThat(contentElement.text(), is("gem install asciidoctor")); }
@Test public void should_add_a_hardbreak_at_end_of_each_line_when_hardbreaks_option_is_set() throws IOException { Attributes attributes = attributes().hardbreaks(true).get(); String content = asciidoctor.convert("read\nmy\nlips", OptionsBuilder.options().attributes(attributes)); Document doc = Jsoup.parse(content, "UTF-8"); Element paragraph = doc.getElementsByAttributeValue("class","paragraph").first(); assertThat(paragraph.getElementsByTag("br").size(), is(2)); }
@Test public void an_inner_class_should_be_registered() { TestHttpServer.start(Collections.singletonMap("http://example.com/asciidoctorclass.rb", classpath.getResource("org/asciidoctor/internal/asciidoctorclass.rb"))); JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.includeProcessor(new RubyIncludeSource(new HashMap<>())); String content = asciidoctor.convertFile( classpath.getResource("sample-with-uri-include.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "language-ruby").first(); assertThat(contentElement.text(), startsWith(ASCIIDOCTORCLASS_PREFIX)); }
@Test public void a_include_instance_processor_should_be_executed_when_include_macro_is_found() { TestHttpServer.start(Collections.singletonMap("http://example.com/asciidoctorclass.rb", classpath.getResource("org/asciidoctor/internal/asciidoctorclass.rb"))); JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.includeProcessor(new UriIncludeProcessor(new HashMap<>())); String content = asciidoctor.convertFile( classpath.getResource("sample-with-uri-include.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "language-ruby").first(); assertThat(contentElement.text(), startsWith(ASCIIDOCTORCLASS_PREFIX)); }
@Test public void should_add_AsciiMath_delimiters_around_math_block_content_if_math_attribute_not_latexmath() throws IOException { Attributes attributes = attributes().math("asciimath").get(); Options options = options().inPlace(false).safe(SafeMode.UNSAFE) .toDir(testFolder.getRoot()).attributes(attributes).get(); asciidoctor.convertFile(classpath.getResource("math.asciidoc"), options); Document doc = Jsoup.parse(new File(testFolder.getRoot(), "math.html"), "UTF-8"); assertThat(doc.getElementsByAttributeValue("type", "text/x-mathjax-config").size(), is(1)); }
@Test public void a_include_processor_can_handle_positional_attrs() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.includeProcessor(PositionalAttrsIncludeProcessor.class); String content = asciidoctor.convertFile(classpath.getResource("sample-with-include-pos-attrs.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "paragraph IncludeBlock").first(); assertThat(contentElement.text(), startsWith("My,Positional,Attribute List")); }
@Test public void a_include_processor_should_only_handle_its_handles() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.includeProcessor(UriIncludeProcessor.class); String content = asciidoctor.convertFile(classpath.getResource("sample-with-include.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "bare").first(); assertThat(contentElement.text(), startsWith("sample-book.adoc")); }
@Test public void a_include_processor_should_only_handle_its_handles() { this.asciidoctor.createGroup() .includeProcessor(UriIncludeProcessor.class) .register(); String content = asciidoctor.convertFile(classpath.getResource("sample-with-include.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "bare").first(); assertThat(contentElement.text(), startsWith("sample-book.adoc")); }
@Test public void a_include_processor_can_handle_positional_attrs() { this.asciidoctor.createGroup() .includeProcessor(PositionalAttrsIncludeProcessor.class) .register(); String content = asciidoctor.convertFile(classpath.getResource("sample-with-include-pos-attrs.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Element contentElement = doc.getElementsByAttributeValue("class", "paragraph IncludeBlock").first(); assertThat(contentElement.text(), startsWith("My,Positional,Attribute List")); }