/** * Gets the leaves of the tree. All leaves nodes are returned as a list * ordered by the natural left to right order of the tree. Null values, * if any, are inserted into the list like any other value. * * @return a {@code List} of the leaves. */ public <T extends Tree> List<T> getLeaves() { return getLeaves(new ArrayList<>()); }
static Tree findTreeWithSmallestSpan(Tree tree, int start, int end) { List<Tree> leaves = tree.getLeaves(); Tree startLeaf = leaves.get(start); Tree endLeaf = leaves.get(end - 1); return Trees.getLowestCommonAncestor(Arrays.asList(startLeaf, endLeaf), tree); }
static Tree findTreeWithSmallestSpan(Tree tree, int start, int end) { List<Tree> leaves = tree.getLeaves(); Tree startLeaf = leaves.get(start); Tree endLeaf = leaves.get(end - 1); return Trees.getLowestCommonAncestor(Arrays.asList(startLeaf, endLeaf), tree); }
private Tree safeHead(Tree top) { Tree head = top.headTerminal(headFinder); if (head != null) return head; // if no head found return the right-most leaf List<Tree> leaves = top.getLeaves(); if ( ! leaves.isEmpty()) { return leaves.get(leaves.size() - 1); } // fallback: return top return top; }
public Tree getSyntacticHeadTree() { Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); return tree.getLeaves().get(syntacticHeadTokenPosition); }
/** * Replace the labels of the leaves with the given leaves. */ public static void setLeafLabels(Tree tree, List<Label> labels) { Iterator<Tree> leafIterator = tree.getLeaves().iterator(); Iterator<Label> labelIterator = labels.iterator(); while (leafIterator.hasNext() && labelIterator.hasNext()) { Tree leaf = leafIterator.next(); Label label = labelIterator.next(); leaf.setLabel(label); //leafIterator.next().setLabel(labelIterator.next()); } if (leafIterator.hasNext()) { throw new IllegalArgumentException("Tree had more leaves than the labels provided"); } if (labelIterator.hasNext()) { throw new IllegalArgumentException("More labels provided than tree had leaves"); } }
private static String toString(Tree tree, boolean plainPrint) { if (!plainPrint) return tree.toString(); StringBuilder sb = new StringBuilder(); List<Tree> leaves = tree.getLeaves(); for (Tree leaf : leaves) { sb.append(leaf.label().value()).append(' '); } return sb.toString(); }
@Override public void restoreOriginalWords(Tree tree) { if (originalSentence == null || tree == null) { return; } List<Tree> leaves = tree.getLeaves(); int expectedSize = addedPunct ? originalSentence.size() + 1 : originalSentence.size(); if (leaves.size() != expectedSize) { throw new IllegalStateException("originalWords and sentence of different sizes: " + expectedSize + " vs. " + leaves.size() + "\n Orig: " + SentenceUtils.listToString(originalSentence) + "\n Pars: " + SentenceUtils.listToString(leaves)); } Iterator<Tree> leafIterator = leaves.iterator(); for (HasWord word : originalSentence) { Tree leaf = leafIterator.next(); if (!(word instanceof Label)) { continue; } leaf.setLabel((Label) word); } }
/** * TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */ @Override public void restoreOriginalWords(Tree tree) { if (originalSentence == null || tree == null) { return; } List<Tree> leaves = tree.getLeaves(); if (leaves.size() != originalSentence.size()) { throw new IllegalStateException("originalWords and sentence of different sizes: " + originalSentence.size() + " vs. " + leaves.size() + "\n Orig: " + SentenceUtils.listToString(originalSentence) + "\n Pars: " + SentenceUtils.listToString(leaves)); } // TODO: get rid of this cast Iterator<? extends Label> wordsIterator = (Iterator<? extends Label>) originalSentence.iterator(); for (Tree leaf : leaves) { leaf.setLabel(wordsIterator.next()); } }
/** * Gets the leaves of the tree. * * @param list The list in which the leaves of the tree will be * placed. Normally, this will be empty when the routine is called, * but if not, the new yield is added to the end of the list. * @return a {@code List} of the leaves. */ @SuppressWarnings("unchecked") public <T extends Tree> List<T> getLeaves(List<T> list) { if (isLeaf()) { list.add((T)this); } else { for (Tree kid : children()) { kid.getLeaves(list); } } return list; }
public Tree transformTree(Tree tree) { for (Tree leaf : tree.getLeaves()) { Label label = leaf.label(); label.setValue(transform.apply(label.value())); } return tree; } }
private int headEmbeddingLevel(Tree tree, int headIndex) { int embeddingLevel = 0; try { Tree subtree = tree.getLeaves().get(headIndex); while (subtree != null) { String label = ((CoreLabel) subtree.label()).get(CoreAnnotations.ValueAnnotation.class); subtree = subtree.ancestor(1, tree); if (label.equals("NP")) { embeddingLevel++; } } } catch (Exception e) { return -1; } return embeddingLevel; }
public String lowestNPIncludesHead (){ String ret = ""; Tree head = this.contextParseTree.getLeaves().get(this.headIndex); Tree lowestNP = head; String s; while(true) { if(lowestNP==null) return ret; s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class); if(s.equals("NP") || s.equals("ROOT")) break; lowestNP = lowestNP.ancestor(1, this.contextParseTree); } if (s.equals("ROOT")) lowestNP = head; for (Tree t : lowestNP.getLeaves()){ if (!ret.equals("")) ret = ret + " "; ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class); } if(!this.spanToString().contains(ret)) return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class); return ret; }
public String lowestNPIncludesHead (){ String ret = ""; Tree head = this.contextParseTree.getLeaves().get(this.headIndex); Tree lowestNP = head; String s; while(true) { if(lowestNP==null) return ret; s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class); if(s.equals("NP") || s.equals("ROOT")) break; lowestNP = lowestNP.ancestor(1, this.contextParseTree); } if (s.equals("ROOT")) lowestNP = head; for (Tree t : lowestNP.getLeaves()){ if (!ret.equals("")) ret = ret + " "; ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class); } if(!this.spanToString().contains(ret)) return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class); return ret; }
private Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) { logger.fine("Looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); List<Tree> leaves = root.getLeaves(); for (Tree leaf : leaves) { CoreLabel label = CoreLabel.class.cast(leaf.label()); int ind = label.get(CoreAnnotations.BeginIndexAnnotation.class); // log.info("Token #" + ind + ": " + leaf.value()); if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) { return leaf; } } // this shouldn't happen // but it does happen (VERY RARELY) on some weird web text that includes SGML tags with spaces // TODO: does this mean that somehow tokenization is different for the parser? check this by throwing an Exception in KBP logger.severe("GenericDataSetReader: WARNING: Failed to find head token"); logger.severe(" when looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); return null; }
void initRandomWordVectors(List<Tree> trainingTrees) { if (op.numHid == 0) { throw new RuntimeException("Cannot create random word vectors for an unknown numHid"); } Set<String> words = Generics.newHashSet(); words.add(UNKNOWN_WORD); for (Tree tree : trainingTrees) { List<Tree> leaves = tree.getLeaves(); for (Tree leaf : leaves) { String word = leaf.label().value(); if (op.lowercaseWordVectors) { word = word.toLowerCase(); } words.add(word); } } this.wordVectors = Generics.newTreeMap(); for (String word : words) { SimpleMatrix vector = randomWordVector(); wordVectors.put(word, vector); } }
/** * Sets the label of the leaf nodes of a Tree to be the CoreLabels in the given sentence. * The original value() of the Tree nodes is preserved, and otherwise the label of tree * leaves becomes the label from the List. */ // todo [cdm 2015]: This clearly shouldn't be here! Maybe it's not needed at all now since parsing code does this? public static void mergeLabels(Tree tree, List<CoreLabel> sentence) { int idx = 0; for (Tree t : tree.getLeaves()) { CoreLabel cl = sentence.get(idx ++); String value = t.value(); cl.set(CoreAnnotations.ValueAnnotation.class, value); t.setLabel(cl); } tree.indexLeaves(); }
/** * Finds the tree the matches this span exactly * @param tree Leaves must be indexed! * @param first First element in the span (first position has offset 1) * @param last Last element included in the span (first position has offset 1) */ public static Tree findExactMatch(Tree tree, int first, int last) { List<Tree> leaves = tree.getLeaves(); int thisFirst = ((CoreMap) leaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class); int thisLast = ((CoreMap) leaves.get(leaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class); if(thisFirst == first && thisLast == last) { return tree; } else { Tree [] kids = tree.children(); for(Tree k: kids){ Tree t = findExactMatch(k, first, last); if(t != null) return t; } } return null; }
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexLeaves(); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t); mentions.add(m); mentionSpanSet.add(mSpan); } } } /** Extract enumerations (A, B, and C) */
/** * Returns the positional index of the right edge of <i>node</i> within the tree, * as measured by characters. Returns -1 if <i>node is not found.</i> * * rightCharEdge returns the index of the rightmost character + 1, so that * rightCharEdge(getLeaves().get(i)) == leftCharEdge(getLeaves().get(i+1)) * * Note: These methods were written for internal evaluation routines. They are * not the right methods to relate tree nodes to textual offsets. For these, * look at the appropriate annotations on a CoreLabel (CharacterOffsetBeginAnnotation, etc.). * * @param node The subtree to look for in this Tree * @return The positional index of the right edge of node */ public int rightCharEdge(Tree node) { List<Tree> s = getLeaves(); int length = 0; for (Tree leaf : s) { length += leaf.label().value().length(); } MutableInteger i = new MutableInteger(length); if (rightCharEdge(node, i)) { return i.intValue(); } return -1; }