private void addToOutgoingUrls(String href, String tag, Attributes attributes) { curUrl = new ExtractedUrlAnchorPair(); curUrl.setHref(href); curUrl.setTag(tag); for (int x = 0; x < attributes.getLength(); x++) { String attrName = attributes.getLocalName(x); String attrVal = attributes.getValue(attrName); curUrl.setAttribute(attrName, attrVal); } outgoingUrls.add(curUrl); }
private void addToOutgoingUrls(String href, String tag) { curUrl = new ExtractedUrlAnchorPair(); curUrl.setHref(href); curUrl.setTag(tag); outgoingUrls.add(curUrl); }
@Override public void endElement(String uri, String localName, String qName) throws SAXException { Element element = HtmlFactory.getElement(localName); if ((element == Element.A) || (element == Element.AREA) || (element == Element.LINK)) { anchorFlag = false; if (curUrl != null) { String anchor = anchorText.toString().replaceAll("\n", " ").replaceAll("\t", " ").trim(); if (!anchor.isEmpty()) { if (anchor.length() > MAX_ANCHOR_LENGTH) { anchor = anchor.substring(0, MAX_ANCHOR_LENGTH) + "..."; } curUrl.setTag(localName); curUrl.setAnchor(anchor); } anchorText.delete(0, anchorText.length()); } curUrl = null; } else if (element == Element.BODY) { isWithinBodyElement = false; } }
private void addToOutgoingUrls(String href, String tag) { curUrl = new ExtractedUrlAnchorPair(); curUrl.setHref(href); curUrl.setTag(tag); outgoingUrls.add(curUrl); }
private void addToOutgoingUrls(String href, String tag, Attributes attributes) { curUrl = new ExtractedUrlAnchorPair(); curUrl.setHref(href); curUrl.setTag(tag); for (int x = 0; x < attributes.getLength(); x++) { String attrName = attributes.getLocalName(x); String attrVal = attributes.getValue(attrName); curUrl.setAttribute(attrName, attrVal); } outgoingUrls.add(curUrl); }
@Override public void endElement(String uri, String localName, String qName) throws SAXException { Element element = HtmlFactory.getElement(localName); if ((element == Element.A) || (element == Element.AREA) || (element == Element.LINK)) { anchorFlag = false; if (curUrl != null) { String anchor = anchorText.toString().replaceAll("\n", " ").replaceAll("\t", " ").trim(); if (!anchor.isEmpty()) { if (anchor.length() > MAX_ANCHOR_LENGTH) { anchor = anchor.substring(0, MAX_ANCHOR_LENGTH) + "..."; } curUrl.setTag(localName); curUrl.setAnchor(anchor); } anchorText.delete(0, anchorText.length()); } curUrl = null; } else if (element == Element.BODY) { isWithinBodyElement = false; } }