/** * Merges adjacent text nodes (where CDATA counts as text node too). Operates recursively on the entire subtree. * The merged node will have the type of the first node of the adjacent merged nodes. * * <p>Because XPath assumes that there are no adjacent text nodes in the tree, not doing this can have * undesirable side effects. Xalan queries like {@code text()} will only return the first of a list of matching * adjacent text nodes instead of all of them, while Jaxen will return all of them as intuitively expected. * * @see #simplify */ static public void mergeAdjacentText(Node parent) { mergeAdjacentText(parent, new StringBuilder(0)); }
static private void mergeAdjacentText(Node parent, StringBuilder collectorBuf) { Node child = parent.getFirstChild(); while (child != null) { Node next = child.getNextSibling(); if (child instanceof Text) { boolean atFirstText = true; while (next instanceof Text) { // if (atFirstText) { collectorBuf.setLength(0); collectorBuf.ensureCapacity(child.getNodeValue().length() + next.getNodeValue().length()); collectorBuf.append(child.getNodeValue()); atFirstText = false; } collectorBuf.append(next.getNodeValue()); parent.removeChild(next); next = child.getNextSibling(); } if (!atFirstText && collectorBuf.length() != 0) { ((CharacterData) child).setData(collectorBuf.toString()); } } else { mergeAdjacentText(child, collectorBuf); } child = next; } }
/** * Same as {@link #parse(InputSource, boolean, boolean)}, but loads from a {@link File}; don't miss the security * warnings documented there. */ static public NodeModel parse(File f, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); ErrorHandler errorHandler = getErrorHandler(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(f); if (removeComments && removePIs) { simplify(doc); } else { if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); } return wrap(doc); }
removePIs(doc); mergeAdjacentText(doc);
/** * Merges adjacent text nodes (where CDATA counts as text node too). Operates recursively on the entire subtree. * The merged node will have the type of the first node of the adjacent merged nodes. * * <p>Because XPath assumes that there are no adjacent text nodes in the tree, not doing this can have * undesirable side effects. Xalan queries like {@code text()} will only return the first of a list of matching * adjacent text nodes instead of all of them, while Jaxen will return all of them as intuitively expected. * * @see #simplify */ static public void mergeAdjacentText(Node parent) { mergeAdjacentText(parent, new StringBuilder(0)); }
/** * Merges adjacent text nodes (where CDATA counts as text node too). Operates recursively on the entire subtree. * The merged node will have the type of the first node of the adjacent merged nodes. * * <p>Because XPath assumes that there are no adjacent text nodes in the tree, not doing this can have * undesirable side effects. Xalan queries like {@code text()} will only return the first of a list of matching * adjacent text nodes instead of all of them, while Jaxen will return all of them as intuitively expected. * * @see #simplify */ static public void mergeAdjacentText(Node parent) { mergeAdjacentText(parent, new StringBuilder(0)); }
/** * Merges adjacent text/cdata nodes, so that there are no * adjacent text/cdata nodes. Operates recursively * on the entire subtree. You thus lose information * about any CDATA sections occurring in the doc. * * @see #simplify */ static public void mergeAdjacentText(Node node) { Node child = node.getFirstChild(); while (child != null) { if (child instanceof Text || child instanceof CDATASection) { Node next = child.getNextSibling(); if (next instanceof Text || next instanceof CDATASection) { String fullText = child.getNodeValue() + next.getNodeValue(); ((CharacterData) child).setData(fullText); node.removeChild(next); } } else { mergeAdjacentText(child); } child = child.getNextSibling(); } }
static private void mergeAdjacentText(Node parent, StringBuilder collectorBuf) { Node child = parent.getFirstChild(); while (child != null) { Node next = child.getNextSibling(); if (child instanceof Text) { boolean atFirstText = true; while (next instanceof Text) { // if (atFirstText) { collectorBuf.setLength(0); collectorBuf.ensureCapacity(child.getNodeValue().length() + next.getNodeValue().length()); collectorBuf.append(child.getNodeValue()); atFirstText = false; } collectorBuf.append(next.getNodeValue()); parent.removeChild(next); next = child.getNextSibling(); } if (!atFirstText && collectorBuf.length() != 0) { ((CharacterData) child).setData(collectorBuf.toString()); } } else { mergeAdjacentText(child, collectorBuf); } child = next; } }
static private void mergeAdjacentText(Node parent, StringBuilder collectorBuf) { Node child = parent.getFirstChild(); while (child != null) { Node next = child.getNextSibling(); if (child instanceof Text) { boolean atFirstText = true; while (next instanceof Text) { // if (atFirstText) { collectorBuf.setLength(0); collectorBuf.ensureCapacity(child.getNodeValue().length() + next.getNodeValue().length()); collectorBuf.append(child.getNodeValue()); atFirstText = false; } collectorBuf.append(next.getNodeValue()); parent.removeChild(next); next = child.getNextSibling(); } if (!atFirstText && collectorBuf.length() != 0) { ((CharacterData) child).setData(collectorBuf.toString()); } } else { mergeAdjacentText(child, collectorBuf); } child = next; } }
/** * Create a NodeModel from an XML file. * @param removeComments whether to remove all comment nodes * (recursively) from the tree before processing * @param removePIs whether to remove all processing instruction nodes * (recursively from the tree before processing */ static public NodeModel parse(File f, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(f); if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); return wrap(doc); }
/** * Create a NodeModel from a SAX input source. Adjacent text nodes will be merged (and CDATA sections * are considered as text nodes). * @param removeComments whether to remove all comment nodes * (recursively) from the tree before processing * @param removePIs whether to remove all processing instruction nodes * (recursively from the tree before processing */ static public NodeModel parse(InputSource is, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(is); if (removeComments && removePIs) { simplify(doc); } else { if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); } return wrap(doc); }
/** * Same as {@link #parse(InputSource, boolean, boolean)}, but loads from a {@link File}; don't miss the security * warnings documented there. */ static public NodeModel parse(File f, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); ErrorHandler errorHandler = getErrorHandler(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(f); if (removeComments && removePIs) { simplify(doc); } else { if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); } return wrap(doc); }
/** * Same as {@link #parse(InputSource, boolean, boolean)}, but loads from a {@link File}; don't miss the security * warnings documented there. */ static public NodeModel parse(File f, boolean removeComments, boolean removePIs) throws SAXException, IOException, ParserConfigurationException { DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); ErrorHandler errorHandler = getErrorHandler(); if (errorHandler != null) builder.setErrorHandler(errorHandler); Document doc = builder.parse(f); if (removeComments && removePIs) { simplify(doc); } else { if (removeComments) { removeComments(doc); } if (removePIs) { removePIs(doc); } mergeAdjacentText(doc); } return wrap(doc); }
removePIs(doc); mergeAdjacentText(doc);
// Indexing: if (indexOp != null) { IndexDescriptor[] indices;