Refine search
private static NodeList deepClone(NodeList tree, Node clonedParent) { NodeList newNodeList = new NodeList(); for (int i = 0; i < tree.size(); i++) { Node node = tree.elementAt(i); Node newNode = cloneOnlyNode(node, clonedParent); newNodeList.add(newNode); if (node.getChildren() != null) { newNode.setChildren(deepClone(node.getChildren(), newNode)); } } return newNodeList; }
private NodeList getRows(NodeList tables) { TagNameFilter trFilter = new TagNameFilter("tr"); Node table = tables.elementAt(0); if (table.getChildren() != null) return table.getChildren().extractAllNodesThatMatch(trFilter); return new NodeList(); }
/** * Remove the supplied Node from the list. * @param node The node to remove. * @return True if the node was found and removed from the list. */ public boolean remove (Node node) { int index; boolean ret; ret = false; if (-1 != (index = indexOf (node))) { remove (index); ret = true; } return (ret); }
/** * Returns the specified attribute value of the first node that is accepted * by the given filter. */ public static String extractAttribute(NodeList nodeList, NodeFilter filter, String attributeName) throws ParserException { NodeList matches = nodeList.extractAllNodesThatMatch(filter, true); if (matches.size() > 0) { return ((TagNode) matches.elementAt(0)).getAttribute(attributeName); } return null; }
private void insertRowAfter(Row existingRow, Row childRow) { NodeList rowNodes = tableNode.getChildren(); int index = rowNodes.indexOf(existingRow.rowNode); Stack<Node> tempStack = new Stack<>(); while (rowNodes.size() - 1 > index) { tempStack.push(rowNodes.elementAt(tableNode.getChildren().size() - 1)); rowNodes.remove(rowNodes.size() - 1); } rowNodes.add(childRow.rowNode); while (!tempStack.isEmpty()) { rowNodes.add(tempStack.pop()); } }
NodeList links = new NodeList (); parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filter); for (int j=0; j<links.size(); j++) CompositeTag jStartTag = (CompositeTag)links.elementAt(j); Tag jEndTag = jStartTag.getEndTag(); int jStartTagBegin = jStartTag.getStartPosition (); int jEndTagEnd = jEndTag.getEndPosition (); for (int k=0; k<links.size(); k++) CompositeTag kStartTag = (CompositeTag)links.elementAt(k); Tag kEndTag = kStartTag.getEndTag(); int kStartTagBegin = kStartTag.getStartPosition (); int kEndTagEnd = kEndTag.getEndPosition (); if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd)) links.remove(k); k--; j--;
private String getValueOfTagWithAttributeValue(String attribute, String value) throws Exception { NodeList matches = getMatchingTags(new HasAttributeFilter(attribute, value)); if (matches.size() != 1) return String.format("There are %d matches, there should be 1.", matches.size()); else return matches.elementAt(0).toHtml(); }
stack.add (ret); ret = next; for (int i = stack.size () - 1; (-1 == index) && (i >= 0); i--) Tag boffo = (Tag)stack.elementAt (i); if (name.equals (boffo.getTagName ())) index = i; addChild ((Tag)stack.elementAt (stack.size () - 1), ret); for (int i = stack.size () - 1; i > index; i--) Tag fred = (Tag)stack.remove (i); finishTag (fred, lexer); addChild ((Tag)stack.elementAt (i - 1), fred); ret = (Tag)stack.remove (index); node = null; int depth = stack.size (); if (0 != depth) node = stack.elementAt (depth - 1); if (node instanceof Tag) stack.remove (depth - 1); finishTag (ret, lexer); addChild (precursor, ret);
public List<String> extractLinks(PageData pageData) { NodeList nodes = pageData.getNodes(); if (nodes == null) { return Collections.emptyList(); } NodeList linkNodes = nodes.extractAllNodesThatMatch(nodeFilter, true); ArrayList<String> links = new ArrayList<String>(linkNodes.size()); SimpleNodeIterator it = linkNodes.elements(); while (it.hasMoreNodes()) { Tag tag = (Tag) it.nextNode(); String href = new String(tag.getAttribute("href")); href = href.trim().replaceAll("&", "&"); links.add(href); } return links; }
private void extractColumns(Map<String, String> map, Node row) { TagNameFilter tdFilter = new TagNameFilter("td"); if (row.getChildren() != null) { NodeList cols = row.getChildren().extractAllNodesThatMatch(tdFilter); if (cols.size() == 2) addColsToMap(map, cols); } }
stack.add (ret); ret = next; for (int i = stack.size () - 1; (-1 == index) && (i >= 0); i--) Tag boffo = (Tag)stack.elementAt (i); if (name.equals (boffo.getTagName ())) index = i; addChild ((Tag)stack.elementAt (stack.size () - 1), ret); for (int i = stack.size () - 1; i > index; i--) Tag fred = (Tag)stack.remove (i); finishTag (fred, lexer); addChild ((Tag)stack.elementAt (i - 1), fred); ret = (Tag)stack.remove (index); node = null; int depth = stack.size (); if (0 != depth) node = stack.elementAt (depth - 1); if (node instanceof Tag) stack.remove (depth - 1); finishTag (ret, lexer); addChild (precursor, ret);
/** * Add a child to the given tag. * @param parent The parent tag. * @param child The child node. */ protected void addChild (Tag parent, Node child) { if (null == parent.getChildren ()) parent.setChildren (new NodeList ()); child.setParent (parent); parent.getChildren ().add (child); }
@SuppressWarnings("unchecked") private <T extends Node> List<T> list(final NodeList list) throws ParserException { final List<T> filtered = new ArrayList<>(); final NodeIterator iterator = list.elements(); while (iterator.hasMoreNodes()) { filtered.add((T) iterator.nextNode()); } return filtered; }
/** * @see org.htmlparser.Tag#accept(org.htmlparser.visitors.NodeVisitor) */ public void accept(NodeVisitor visitor) { // be invisible but show the children (if they like visits) NodeList children = m_decorated.getChildren(); if (children == null) { return; } SimpleNodeIterator itChildren = children.elements(); while (itChildren.hasMoreNodes()) { itChildren.nextNode().accept(visitor); } }
/** * Set the children of this node. * @param children The new list of children this node contains. */ /* See bug: https://sourceforge.net/tracker/?func=detail&aid=1755537&group_id=24399&atid=381399 * A check needs to be performed to see that a tag cannot be its own parent * or child and if it is the case then just ignore it */ public void setChildren (NodeList children) { /* Always Initialize the children field as in the constructor its being * initialized to null */ this.children = new NodeList(); /* Do nothing if the children node list contains the node * (i.e. the node whose children is being set) itself */ for(SimpleNodeIterator it = children.elements(); it.hasMoreNodes();){ Node nodetoadd = it.nextNode(); if(this != nodetoadd){ this.children.add(nodetoadd); } } //this.children = children; }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }