@Override public Label newLabel(Label oldLabel) { return new IndexedWord(oldLabel); }
public IndexedWord makeSoftCopy(int count) { IndexedWord copy = new IndexedWord(label); copy.setCopyCount(count); copy.original = this; return copy; }
private IndexedWord makeCopy(int count) { CoreLabel labelCopy = new CoreLabel(label); IndexedWord copy = new IndexedWord(labelCopy); copy.setCopyCount(count); return copy; }
@Override public Label newLabel(String labelStr) { CoreLabel coreLabel = new CoreLabel(); coreLabel.setValue(labelStr); return new IndexedWord(coreLabel); }
/** * Get GrammaticalRelation between gov and dep, and null if gov is not the * governor of dep. */ public GrammaticalRelation getGrammaticalRelation(int govIndex, int depIndex) { TreeGraphNode gov = getNodeByIndex(govIndex); TreeGraphNode dep = getNodeByIndex(depIndex); // TODO: this is pretty ugly return getGrammaticalRelation(new IndexedWord(gov.label()), new IndexedWord(dep.label())); }
/** * Given the node arg string, converts it into an IndexedWord. */ public static IndexedWord fromCheapString(String rawArg) { String arg = rawArg.substring(1, rawArg.length()-1); String[] tuples=arg.split(ATOM_DELIMITER); Map<String,String> args = Generics.newHashMap(); for (String tuple : tuples) { String[] vals = tuple.split(TUPLE_DELIMITER); String key = vals[0]; String value = ""; if (vals.length == 2) value = vals[1]; args.put(key, value); } IndexedWord newWord = new IndexedWord(); newWord.setWord(args.get(WORD_KEY)); newWord.setLemma(args.get(LEMMA_KEY)); newWord.setTag(args.get(POS_KEY)); newWord.setValue(args.get(VALUE_KEY)); newWord.setOriginalText(args.get(CURRENT_KEY)); return newWord; }
/** * Inserts the given specific portion of an uncollapsed relation back into the * targetList * * @param specific Specific relation to put in. * @param relnTgtNode Node governed by the uncollapsed relation * @param tgtList Target List of words */ private void insertSpecificIntoList(String specific, IndexedWord relnTgtNode, List<IndexedWord> tgtList) { int currIndex = tgtList.indexOf(relnTgtNode); Set<IndexedWord> descendants = descendants(relnTgtNode); IndexedWord specificNode = new IndexedWord(); specificNode.set(CoreAnnotations.LemmaAnnotation.class, specific); specificNode.set(CoreAnnotations.TextAnnotation.class, specific); specificNode.set(CoreAnnotations.OriginalTextAnnotation.class, specific); while ((currIndex >= 1) && descendants.contains(tgtList.get(currIndex - 1))) { currIndex--; } tgtList.add(currIndex, specificNode); }
/** The head of the subject of this relation triple. */ @Override public CoreLabel subjectHead() { if (subject.size() == 1) { return subject.get(0); } Span subjectSpan = Span.fromValues(subject.get(0).index(), subject.get(subject.size() - 1).index()); for (int i = subject.size() - 1; i >= 0; --i) { for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(subject.get(i)))) { if (edge.getGovernor().index() < subjectSpan.start() || edge.getGovernor().index() >= subjectSpan.end()) { return subject.get(i); } } } return subject.get(subject.size() - 1); }
/** The head of the object of this relation triple. */ @Override public CoreLabel objectHead() { if (object.size() == 1) { return object.get(0); } Span objectSpan = Span.fromValues(object.get(0).index(), object.get(object.size() - 1).index()); for (int i = object.size() - 1; i >= 0; --i) { for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(object.get(i)))) { if (edge.getGovernor().index() < objectSpan.start() || edge.getGovernor().index() >= objectSpan.end()) { return object.get(i); } } } return object.get(object.size() - 1); }
@Override public String toString() { List<Pair<String, Integer>> glosses = new ArrayList<>(); for (CoreLabel word : words) { // Add the word itself glosses.add(Pair.makePair(word.word(), word.index() - 1)); String addedConnective = null; // Find additional connectives for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) { String rel = edge.getRelation().toString(); if (rel.contains("_")) { // for Stanford dependencies only addedConnective = rel.substring(rel.indexOf('_') + 1); } } if (addedConnective != null) { // Found a connective (e.g., a preposition or conjunction) Pair<Integer, Integer> yield = parseTree.yieldSpan(new IndexedWord(word)); glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1)); } } // Sort the sentence Collections.sort(glosses, (a, b) -> a.second - b.second); // Return the sentence return StringUtils.join(glosses.stream().map(Pair::first), " "); }
/** Look through the tree t and adds to the List basicDep * additional dependencies which aren't * in the List but which satisfy the filter puncTypedDepFilter. * * @param deps The list of dependencies which may be augmented * @param completeGraph a graph of all the tree dependencies found earlier * @param puncTypedDepFilter The filter that may skip punctuation dependencies * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter */ protected void getTreeDeps(List<TypedDependency> deps, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> completeGraph, Predicate<TypedDependency> puncTypedDepFilter, Predicate<TypedDependency> extraTreeDepFilter) { for (TreeGraphNode gov : completeGraph.getAllVertices()) { for (TreeGraphNode dep : completeGraph.getChildren(gov)) { for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) { TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())); if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) { newDep.setExtra(); deps.add(newDep); } } } } }
/** * A helper to add a single word to a given dependency tree * @param toModify The tree to add the word to. * @param root The root of the tree where we should be adding the word. * @param rel The relation to add the word with. * @param coreLabel The word to add. */ @SuppressWarnings("UnusedDeclaration") private static void addWord(SemanticGraph toModify, IndexedWord root, String rel, CoreLabel coreLabel) { IndexedWord dependent = new IndexedWord(coreLabel); toModify.addVertex(dependent); toModify.addEdge(root, dependent, GrammaticalRelation.valueOf(Language.English, rel), Double.NEGATIVE_INFINITY, false); }
/** The head of the relation of this relation triple. */ @Override public CoreLabel relationHead() { if (relation.size() == 1) { return relation.get(0); } CoreLabel guess = null; CoreLabel newGuess = super.relationHead(); int iters = 0; // make sure we don't infinite loop... while (guess != newGuess && iters < 100) { guess = newGuess; iters += 1; for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(guess))) { // find a node in the relation list which is a governor of the candidate root Optional<CoreLabel> governor = relation.stream().filter(x -> x.index() == edge.getGovernor().index()).findFirst(); // if we found one, this is the new root. The for loop continues if (governor.isPresent()) { newGuess = governor.get(); } } } // Return if (iters >= 100) { err("Likely cycle in relation tree"); } return guess; }
/** * GIven a graph, returns a new graph with the the new sentence index enforced. * NOTE: new vertices are inserted. * TODO: is this ok? rewrite this? */ public static SemanticGraph setSentIndex(SemanticGraph sg, int newSentIndex) { SemanticGraph newGraph = new SemanticGraph(sg); List<IndexedWord> prevRoots = new ArrayList<>(newGraph.getRoots()); List<IndexedWord> newRoots = new ArrayList<>(); // TODO: we are using vertexListSorted here because we're changing // vertices while iterating. Perhaps there is a better way to do it. for (IndexedWord node : newGraph.vertexListSorted()) { IndexedWord newWord = new IndexedWord(node); newWord.setSentIndex(newSentIndex); SemanticGraphUtils.replaceNode(newWord, node, newGraph); if (prevRoots.contains(node)) newRoots.add(newWord); } newGraph.setRoots(newRoots); return newGraph; }
/** * Given an iterable set of distinct vertices, creates a new mapping that maps the * original vertices to a set of "generic" versions. Used for generalizing tokens in discovered rules. * * @param verts Vertices to anonymize * @param prefix Prefix to assign to this anonymization */ public static Map<IndexedWord, IndexedWord> anonymyizeNodes(Iterable<IndexedWord> verts, String prefix) { Map<IndexedWord, IndexedWord> retMap = Generics.newHashMap(); int index = 1; for (IndexedWord orig: verts) { IndexedWord genericVert = new IndexedWord(orig); genericVert.set(CoreAnnotations.LemmaAnnotation.class, ""); String genericValue = prefix+index; genericVert.setValue(genericValue); genericVert.setWord(genericValue); genericVert.setOriginalText(genericValue); index++; retMap.put(orig, genericVert); } return retMap; }
private Pair<IndexedWord, GrammaticalRelation> getGovAndReln(double govPseudoIndex, IndexedWord word, String relationName, List<IndexedWord> sortedTokens) { IndexedWord gov; GrammaticalRelation reln; if (relationName.equals("root")) { reln = GrammaticalRelation.ROOT; } else { reln = GrammaticalRelation.valueOf(this.lang, relationName); } if (govPseudoIndex == 0.0) { gov = new IndexedWord(word.docID(), word.sentIndex(), 0); gov.setValue("ROOT"); } else { gov = this.getToken(sortedTokens, govPseudoIndex); } return Generics.newPair(gov, reln); }
private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair<String, Integer> wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; }
/** * Copies a the current graph, but also sets the mapping from the old to new * graph. */ public SemanticGraph(SemanticGraph g, Map<IndexedWord, IndexedWord> prevToNewMap) { graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory); if (prevToNewMap == null) { prevToNewMap = wordMapFactory.newMap(); } Set<IndexedWord> vertexes = g.vertexSet(); for (IndexedWord vertex : vertexes) { IndexedWord newVertex = new IndexedWord(vertex); newVertex.setCopyCount(vertex.copyCount()); addVertex(newVertex); prevToNewMap.put(vertex, newVertex); } roots = wordMapFactory.newSet(); for (IndexedWord oldRoot : g.getRoots()) { roots.add(prevToNewMap.get(oldRoot)); } for (SemanticGraphEdge edge : g.edgeIterable()) { IndexedWord newGov = prevToNewMap.get(edge.getGovernor()); IndexedWord newDep = prevToNewMap.get(edge.getDependent()); addEdge(newGov, newDep, edge.getRelation(), edge.getWeight(), edge.isExtra()); } }
/** Returns a list of TypedDependency in the graph. * This method goes through all SemanticGraphEdge and converts them * to TypedDependency. * * @return A List of TypedDependency in the graph */ public Collection<TypedDependency> typedDependencies() { Collection<TypedDependency> dependencies = new ArrayList<>(); IndexedWord root = null; for (IndexedWord node : roots) { if (root == null) { root = new IndexedWord(node.docID(), node.sentIndex(), 0); root.setValue("ROOT"); } TypedDependency dependency = new TypedDependency(ROOT, root, node); dependencies.add(dependency); } for (SemanticGraphEdge e : this.edgeIterable()){ TypedDependency dependency = new TypedDependency(e.getRelation(), e.getGovernor(), e.getDependent()); if (e.isExtra()) { dependency.setExtra(); } dependencies.add(dependency); } return dependencies; }
/** * TODO: figure out how to specify where in the sentence this node goes. * TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel. * TODO: bombproof if this gov, dep, and reln already exist. */ @Override public void evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = sm.getNode(govNodeName); IndexedWord newNode = new IndexedWord(newNodePrototype); int newIndex = SemanticGraphUtils.leftMostChildVertice(govNode, sg).index(); // cheap En-specific hack for placing copula (beginning of governing phrase) newNode.setDocID(govNode.docID()); newNode.setIndex(newIndex); newNode.setSentIndex(govNode.sentIndex()); sg.addVertex(newNode); sg.addEdge(govNode, newNode, relation, weight,false); }