Refine search
Set<IndexedWord> subgraphNodeSet = sg.getSubgraphVertices(rootNode); if ( ! sg.isDag(rootNode)) { for (IndexedWord child : sg.getChildren(rootNode)) { Set<IndexedWord> reachableSet = sg.getSubgraphVertices(child); if (reachableSet.contains(rootNode)) { Collections.sort(sortedSubgraphNodes); IndexedWord newNode = new IndexedWord(rootNode.docID(), rootNode.sentIndex(), rootNode.index()); for (Class key : newNode.backingLabel().keySet()) { newNode.set(key, rootNode.get(key)); newNode.setValue(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::value), " ")); newNode.setWord(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::word), " ")); newNode.setLemma(StringUtils.join(sortedSubgraphNodes.stream().map(x -> x.lemma() == null ? x.word() : x.lemma()), " ")); if (sg.getRoots().contains(rootNode)) {
/** * This hashCode uses only the docID, sentenceIndex, and index. * See compareTo for more info. */ @Override public int hashCode() { if (cachedHashCode != 0) { return cachedHashCode; } boolean sensible = false; int result = 0; if (get(CoreAnnotations.DocIDAnnotation.class) != null) { result = get(CoreAnnotations.DocIDAnnotation.class).hashCode(); sensible = true; } if (containsKey(CoreAnnotations.SentenceIndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.SentenceIndexAnnotation.class).hashCode(); sensible = true; } if (containsKey(CoreAnnotations.IndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.IndexAnnotation.class).hashCode(); sensible = true; } if ( ! sensible) { log.info("WARNING!!! You have hashed an IndexedWord with no docID, sentIndex or wordIndex. You will almost certainly lose"); } cachedHashCode = result; return result; }
public IndexedWord makeSoftCopy(int count) { IndexedWord copy = new IndexedWord(label); copy.setCopyCount(count); copy.original = this; return copy; }
/** A helper method for * {@link NaturalLogicAnnotator#getModifierSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)} and * {@link NaturalLogicAnnotator#getSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)}. */ private static Pair<Integer, Integer> getGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, Set<String> validArcs) { int min = root.index(); int max = root.index(); Queue<IndexedWord> fringe = new LinkedList<>(); for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(root)) { String edgeLabel = edge.getRelation().getShortName(); if ((validArcs == null || validArcs.contains(edgeLabel)) && !"punct".equals(edgeLabel)) { fringe.add(edge.getDependent()); } } while (!fringe.isEmpty()) { IndexedWord node = fringe.poll(); min = Math.min(node.index(), min); max = Math.max(node.index(), max); // ignore punctuation fringe.addAll(tree.getOutEdgesSorted(node).stream().filter(edge -> edge.getGovernor().equals(node) && !(edge.getGovernor().equals(edge.getDependent())) && !"punct".equals(edge.getRelation().getShortName())).map(SemanticGraphEdge::getDependent).collect(Collectors.toList())); } return Pair.makePair(min, max + 1); }
if (authenticator != null && !authenticator.test(props)) { respondUnauthorized(httpExchange); return; if (filter) { docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> regex.matcher(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)).matches() ).collect(Collectors.toList())); } else { docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)); int i = 0; sentWriter.set(Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set("begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set("$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode(capture); groupWriter.set("text", node.word()); groupWriter.set("begin", node.index() - 1); groupWriter.set("end", node.index()); });
/** * Return a Protobuf RelationTriple from a RelationTriple. */ public static CoreNLPProtos.SentenceFragment toProto(SentenceFragment fragment) { return CoreNLPProtos.SentenceFragment.newBuilder() .setAssumedTruth(fragment.assumedTruth) .setScore(fragment.score) .addAllTokenIndex(fragment.words.stream().map(x -> x.index() - 1).collect(Collectors.toList())) .setRoot(fragment.parseTree.getFirstRoot().index() - 1) .build(); }
CoreNLPProtos.DependencyGraph.Builder builder = CoreNLPProtos.DependencyGraph.newBuilder(); Set<Integer> rootSet = graph.getRoots().stream().map(IndexedWord::index).collect(Collectors.toCollection(IdentityHashSet::new)); for (IndexedWord node : graph.vertexSet()) { .setSentenceIndex(node.get(SentenceIndexAnnotation.class)) .setIndex(node.index()); if (node.copyCount() > 0) { nodeBuilder.setCopyAnnotation(node.copyCount()); if (rootSet.contains(node.index())) { builder.addRoot(node.index()); for (SemanticGraphEdge edge : graph.edgeIterable()) { .setSource(edge.getSource().index()) .setTarget(edge.getTarget().index()) .setDep(edge.getRelation().toString()) .setIsExtra(edge.isExtra()) .setSourceCopy(edge.getSource().copyCount()) .setTargetCopy(edge.getTarget().copyCount()) .setLanguage(toProto(edge.getRelation().getLanguage())));
wordList.stream().filter(w -> w.tag() != null && w.tag().equals(COMMENT_POS)) .forEach(w -> { lineNumberCounter++; comments.add(w.word()); }); wordList.stream().filter(w -> w.tag() == null || ! w.tag().equals(COMMENT_POS)) .filter(w -> !w.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) if (word.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) { tokenSpan = word.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class); originalToken = word.word(); } else { if (tokenSpan != null && tokenSpan.getTarget() >= word.index()) { word.setOriginalText(originalToken); word.set(CoreAnnotations.CoNLLUTokenSpanAnnotation.class, tokenSpan); } else { tokenSpan = null; int basicGovIdx = word.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class) != null ? word.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class) : -1; TypedDependency basicDep = null; if (basicGovIdx > -1) { Pair<IndexedWord, GrammaticalRelation> basicGovReln = getGovAndReln((double) basicGovIdx, word, word.get(CoreAnnotations.CoNLLDepTypeAnnotation.class), sortedTokens); IndexedWord basicGov = basicGovReln.first(); GrammaticalRelation basicReln = basicGovReln.second();
/** The head of the relation of this relation triple. */ @Override public CoreLabel relationHead() { if (relation.size() == 1) { return relation.get(0); } CoreLabel guess = null; CoreLabel newGuess = super.relationHead(); int iters = 0; // make sure we don't infinite loop... while (guess != newGuess && iters < 100) { guess = newGuess; iters += 1; for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(guess))) { // find a node in the relation list which is a governor of the candidate root Optional<CoreLabel> governor = relation.stream().filter(x -> x.index() == edge.getGovernor().index()).findFirst(); // if we found one, this is the new root. The for loop continues if (governor.isPresent()) { newGuess = governor.get(); } } } // Return if (iters >= 100) { err("Likely cycle in relation tree"); } return guess; }
/** * Convert a dependency graph to a format expected as input to {@link Writer#set(String, Object)}. */ @SuppressWarnings("RedundantCast") // It's lying; we need the "redundant" casts (as of 2014-09-08) private static Object buildDependencyTree(SemanticGraph graph) { if(graph != null) { return Stream.concat( // Roots graph.getRoots().stream().map( (IndexedWord root) -> (Consumer<Writer>) dep -> { dep.set("dep", "ROOT"); dep.set("governor", 0); dep.set("governorGloss", "ROOT"); dep.set("dependent", root.index()); dep.set("dependentGloss", root.word()); }), // Regular edges graph.edgeListSorted().stream().map( (SemanticGraphEdge edge) -> (Consumer<Writer>) (Writer dep) -> { dep.set("dep", edge.getRelation().toString()); dep.set("governor", edge.getGovernor().index()); dep.set("governorGloss", edge.getGovernor().word()); dep.set("dependent", edge.getDependent().index()); dep.set("dependentGloss", edge.getDependent().word()); }) ); } else { return null; } }
@Override public String toString() { List<Pair<String, Integer>> glosses = new ArrayList<>(); for (CoreLabel word : words) { // Add the word itself glosses.add(Pair.makePair(word.word(), word.index() - 1)); String addedConnective = null; // Find additional connectives for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) { String rel = edge.getRelation().toString(); if (rel.contains("_")) { // for Stanford dependencies only addedConnective = rel.substring(rel.indexOf('_') + 1); } } if (addedConnective != null) { // Found a connective (e.g., a preposition or conjunction) Pair<Integer, Integer> yield = parseTree.yieldSpan(new IndexedWord(word)); glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1)); } } // Sort the sentence Collections.sort(glosses, (a, b) -> a.second - b.second); // Return the sentence return StringUtils.join(glosses.stream().map(Pair::first), " "); }
private static Object getNodes(SemanticGraph graph) { if(graph != null) { List<IndexedWord> vertexList = graph.vertexListSorted(); int maxIndex = vertexList.get(vertexList.size() - 1).index(); return vertexList.stream().map( (IndexedWord token) -> (Consumer<Writer>) node -> { if (token.copyCount() == 0) { node.set("id", getNodeIndex(token, maxIndex)); node.set("start", token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); node.set("end", token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { node.set("id", getNodeIndex(token, maxIndex)); node.set("source", token.index()); } node.set("form", token.word()); if (graph.getRoots().contains(token)) node.set("top", true); node.set("properties", (Consumer<Writer>) propertiesWriter -> { propertiesWriter.set("xpos", token.tag()); propertiesWriter.set("upos", token.get(CoreAnnotations.CoarseTagAnnotation.class)); propertiesWriter.set("lemma", token.lemma()); }); node.set("edges", graph.getOutEdgesSorted(token).stream().map( (SemanticGraphEdge dep) -> (Consumer<Writer>) edge -> { edge.set("target", getNodeIndex(dep.getDependent(), maxIndex)); edge.set("label", dep.getRelation().toString()); })); } ); } else { return null; } }
public String getRelation() { if(headIndexedWord == null) return null; if(enhancedDependency.getRoots().isEmpty()) return null; if(enhancedDependency.getFirstRoot().equals(headIndexedWord)) return "root"; if(!enhancedDependency.containsVertex(getHeadParent())) return null; GrammaticalRelation relation = enhancedDependency.reln(getHeadParent(), headIndexedWord); && getHeadChildren().stream().anyMatch(c -> c.tag().equals("IN"))) || relation == UniversalEnglishGrammaticalRelations.TEMPORAL_MODIFIER || relation == UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER
Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead)); for (IndexedWord child : children) { SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child); if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) { sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra()); sg.removeEdge(oldEdge); for (int i = nameParts.get(0).index(), end = oldHead.index(); i < end; i++) { IndexedWord node = sg.getNodeByIndexSafe(i); if (node == null) { return; if ( ! nameParts.contains(node) && PUNCT_TAG_FILTER.test(node.tag())) {
if (descendantSet.contains(curr) || (doNotAddThese!=null && doNotAddThese.contains(curr)) || !acceptWord.test(curr.backingLabel())) { return; if (!ignoreCommonTags || !ignoreTags.contains(curr.tag().trim())) { descendantSet.add(curr); for (IndexedWord child : g.getChildren(curr)) { boolean dontuse = false; if (doNotAddThese!=null &&doNotAddThese.contains(child)) rel = g.reln(curr, child); dontuse = checkIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel); if (child.tag().matches(cutOffTagRegex)) { if (DEBUG >= 5) System.out.println("ignored tag " + child if(!feat.containsKey(curr.index())){ feat.put(curr.index(), new ArrayList<>()); GetPatternsFromDataMultiClass.getFeatures(g, curr, false, feat.get(curr.index()), rel);
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
/** Look through the tree t and adds to the List basicDep * additional dependencies which aren't * in the List but which satisfy the filter puncTypedDepFilter. * * @param deps The list of dependencies which may be augmented * @param completeGraph a graph of all the tree dependencies found earlier * @param puncTypedDepFilter The filter that may skip punctuation dependencies * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter */ protected void getTreeDeps(List<TypedDependency> deps, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> completeGraph, Predicate<TypedDependency> puncTypedDepFilter, Predicate<TypedDependency> extraTreeDepFilter) { for (TreeGraphNode gov : completeGraph.getAllVertices()) { for (TreeGraphNode dep : completeGraph.getChildren(gov)) { for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) { TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())); if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) { newDep.setExtra(); deps.add(newDep); } } } } }