/** * Return a string representation of this tag for debugging. * @return A string with the text of the title. */ public String toString() { return "TITLE: " + getTitle(); } }
/** * Return a string representation of this tag for debugging. * @return A string with the text of the title. */ public String toString() { return "TITLE: " + getTitle(); } }
import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.filters.HasAttributeFilter; import org.htmlparser.filters.IsEqualFilter; import org.htmlparser.tags.MetaTag; import org.htmlparser.tags.TitleTag; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; public class MM { public static void main(String[] args) { Parser parser=new Parser(); try { parser.setResource("http://www.google.com"); TitleTag title=new TitleTag(); String tagtext=title.getTitle(); System.out.println(tagtext); } }catch (ParserException e) { } } }
@Override public void visitTag(Tag tag) { if (tag instanceof LinkTag) { LinkTag linkTag = (LinkTag) tag; if (linkTag.getLink().startsWith(baseUrl) && isProbablyHtml(linkTag.getLink())) { logger.debug("Using link pointing to {}", linkTag.getLink()); linksToVisit.add(linkTag.getLink()); } else { logger.debug("Skipping link pointing to {}", linkTag.getLink()); } } else if (tag instanceof TitleTag) { TitleTag titleTag = (TitleTag) tag; title = titleTag.getTitle(); } else if (tag instanceof BodyTag) { BodyTag bodyTag = (BodyTag) tag; content = bodyTag.toPlainTextString(); } }
/** * Extracts the title from the given HTML. * * @return never null, just an empty string if not parsable. */ public static String extractTitle(String html) throws ParserException { String title = ""; Parser parser = new Parser(html); NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER); SimpleNodeIterator it = matches.elements(); while (it.hasMoreNodes()) { TitleTag node = (TitleTag) it.nextNode(); title = node.getTitle().trim(); } return title; }
public void visitTag(Tag tag) { if (isTable(tag)) tables.add(tag); else if (isBodyTag(tag)) nodesInBody = tag.getChildren (); else if (isTitleTag(tag)) title = ((TitleTag)tag).getTitle(); }
public void visitTag(Tag tag) { if (isTable(tag)) tables.add(tag); else if (isBodyTag(tag)) nodesInBody = tag.getChildren (); else if (isTitleTag(tag)) title = ((TitleTag)tag).getTitle(); }