org.htmlparser.tags.LinkTag.getLink java code examples

/**
 * Tests if the link is javascript
 * @return flag indicating if the link is a javascript code
 */
public boolean isJavascriptLink()
{
  getLink (); // force an evaluation of the booleans
  return (javascriptLink);
}

  @Test
  public void testLinkExtraction() throws ParserException {
    Parser parser = new Parser("http://synyx.de");
    ObjectFindingVisitor visitor = new ObjectFindingVisitor(LinkTag.class);
    parser.visitAllNodesWith(visitor);
    Node[] links = visitor.getTags();
    // TODO this could use some more meaningful assertions
    assertTrue(links.length > 0);
    for (int i = 0; i < links.length; i++) {
      LinkTag linkTag = (LinkTag) links[i];
      System.out.print("\"" + linkTag.getLinkText() + "\" => ");
      System.out.println(linkTag.getLink());
    }
  }
}

/**
 * Is this a mail address
 * @return boolean true/false
 */
public boolean isMailLink()
{
  getLink (); // force an evaluation of the booleans
  return (mailLink);
}

/**
 * Is this a mail address
 * @return boolean true/false
 */
public boolean isMailLink()
{
  getLink (); // force an evaluation of the booleans
  return (mailLink);
}

/**
 * Tests if the link is javascript
 * @return flag indicating if the link is a javascript code
 */
public boolean isJavascriptLink()
{
  getLink (); // force an evaluation of the booleans
  return (javascriptLink);
}

/**
 * Tests if the link is an FTP link.
 *
 * @return flag indicating if this link is an FTP link
 */
public boolean isFTPLink() {
  return getLink ().indexOf("ftp://")==0;
}

/**
 * Tests if the link is an HTTPS link.
 *
 * @return flag indicating if this link is an HTTPS link
 */
public boolean isHTTPSLink() {
    return getLink ().indexOf("https://")==0;
}

/**
 * Tests if the link is an HTTPS link.
 *
 * @return flag indicating if this link is an HTTPS link
 */
public boolean isHTTPSLink() {
    return getLink ().indexOf("https://")==0;
}

/**
 * Tests if the link is an IRC link.
 * @return flag indicating if this link is an IRC link
 */
public boolean isIRCLink() {
  return getLink ().indexOf("irc://")==0;
}

/**
 * Tests if the link is an FTP link.
 *
 * @return flag indicating if this link is an FTP link
 */
public boolean isFTPLink() {
  return getLink ().indexOf("ftp://")==0;
}

/**
 * Tests if the link is an IRC link.
 * @return flag indicating if this link is an IRC link
 */
public boolean isIRCLink() {
  return getLink ().indexOf("irc://")==0;
}

  @Override
  public boolean accept(Node node) {
    if (!(node instanceof LinkTag))
      return false;
    final LinkTag link = (LinkTag) node;
    return pattern.matcher(link.getLink()).matches();
  }
}

  /**
   * Accept nodes that are a LinkTag and have a URL
   * that matches the regex pattern supplied in the constructor.
   * @param node The node to check.
   * @return <code>true</code> if the node is a link with the pattern.
   */
  public boolean accept (Node node)
  {
    boolean ret;

    ret = false;
    if (LinkTag.class.isAssignableFrom (node.getClass ()))
    {
      String link = ((LinkTag)node).getLink ();
      Matcher matcher = mRegex.matcher (link);
      ret = matcher.find ();
    }

    return (ret);
  }
}

@Override
public void visitTag(Tag tag) {
  if (tag instanceof LinkTag) {
    LinkTag linkTag = (LinkTag) tag;
    if (linkTag.getLink().startsWith(baseUrl) && isProbablyHtml(linkTag.getLink())) {
      logger.debug("Using link pointing to {}", linkTag.getLink());
      linksToVisit.add(linkTag.getLink());
    } else {
      logger.debug("Skipping link pointing to {}", linkTag.getLink());
    }
  } else if (tag instanceof TitleTag) {
    TitleTag titleTag = (TitleTag) tag;
    title = titleTag.getTitle();
  } else if (tag instanceof BodyTag) {
    BodyTag bodyTag = (BodyTag) tag;
    content = bodyTag.toPlainTextString();
  }
}

/**
 * Tries to find a link that has an URI following the given pattern
 * 
 * @param pattern
 *            the pattern
 * @return the link content, if found. <code>null</code> otherwise
 */
public String findLink(final Pattern pattern) {
  for (final LinkTag tag : filter(LinkTag.class, new LinkPatternFilter(
      pattern))) {
    return tag.getLink();
  }
  return null;
}

public static List<String> getLinks(String url) throws ParserException {
  Parser htmlParser = new Parser(url);
  List<String> links = new LinkedList<String>();
  NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
  for (int m = 0; m < tagNodeList.size(); m++) {
    LinkTag loopLinks = (LinkTag) tagNodeList.elementAt(m);
    String linkName = loopLinks.getLink();
    links.add(linkName);
  }
  return links;
}

 public static List<String> getLinksOnPage(final String url) {
  final Parser htmlParser = new Parser(url);
  final List<String> result = new LinkedList<String>();

  try {
    final NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
    for (int j = 0; j < tagNodeList.size(); j++) {
      final LinkTag loopLink = (LinkTag) tagNodeList.elementAt(j);
      final String loopLinkStr = loopLink.getLink();
      result.add(loopLinkStr);
    }
  } catch (ParserException e) {
    e.printStackTrace(); // TODO handle error
  }

  return result;
}

 public static List<String> getLinksOnPage(final String url) {
  final Parser htmlParser = new Parser(url);
  final List<String> result = new LinkedList<String>();

  try {
    final NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
    for (int j = 0; j < tagNodeList.size(); j++) {
      final LinkTag loopLink = (LinkTag) tagNodeList.elementAt(j);
      final String loopLinkStr = loopLink.getLink();
      result.add(loopLinkStr);
    }
  } catch (ParserException e) {
    e.printStackTrace(); // TODO handle error
  }

  return result;
}

public void visitTag(Tag tag)
{
  if (tag instanceof LinkTag)
    ((LinkTag)tag).setLink(linkPrefix + ((LinkTag)tag).getLink());
  else if (tag instanceof ImageTag)
    ((ImageTag)tag).setImageURL(linkPrefix + ((ImageTag)tag).getImageURL());
  // process only those nodes that won't be processed by an end tag,
  // nodes without parents or parents without an end tag, since
  // the complete processing of all children should happen before
  // we turn this node back into html text
  if (null == tag.getParent ()
    && (!(tag instanceof CompositeTag) || null == ((CompositeTag)tag).getEndTag ()))
    modifiedResult.append(tag.toHtml());
}

public void visitTag(Tag tag)
{
  if (tag instanceof LinkTag)
    ((LinkTag)tag).setLink(linkPrefix + ((LinkTag)tag).getLink());
  else if (tag instanceof ImageTag)
    ((ImageTag)tag).setImageURL(linkPrefix + ((ImageTag)tag).getImageURL());
  // process only those nodes that won't be processed by an end tag,
  // nodes without parents or parents without an end tag, since
  // the complete processing of all children should happen before
  // we turn this node back into html text
  if (null == tag.getParent ()
    && (!(tag instanceof CompositeTag) || null == ((CompositeTag)tag).getEndTag ()))
    modifiedResult.append(tag.toHtml());
}

Javadoc

Returns the url as a string, to which this link points. This string has had the "mailto:" and "javascript:" protocol stripped off the front (if those predicates return true) but not for other protocols. Don't ask me why, it's a legacy thing.

Popular in Java

Parsing JSON documents to java classes using gson
getResourceAsStream (ClassLoader)
onRequestPermissionsResult (Fragment)
requestLocationUpdates (LocationManager)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
IsNull (org.hamcrest.core)
Is the value null?
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
JTable (javax.swing)
Top Sublime Text plugins

How to use getLinkmethodin org.htmlparser.tags.LinkTag

Best Java code snippets using org.htmlparser.tags.LinkTag.getLink (Showing top 20 results out of 315)

How to use
getLink
method
in
org.htmlparser.tags.LinkTag