edu.stanford.nlp.ling.IndexedWord java code examples

Refine search

Set<IndexedWord> subgraphNodeSet = sg.getSubgraphVertices(rootNode);
if ( ! sg.isDag(rootNode)) {
 for (IndexedWord child : sg.getChildren(rootNode)) {
  Set<IndexedWord> reachableSet = sg.getSubgraphVertices(child);
  if (reachableSet.contains(rootNode)) {
Collections.sort(sortedSubgraphNodes);
IndexedWord newNode = new IndexedWord(rootNode.docID(), rootNode.sentIndex(), rootNode.index());
for (Class key : newNode.backingLabel().keySet()) {
 newNode.set(key, rootNode.get(key));
newNode.setValue(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::value), " "));
newNode.setWord(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::word), " "));
newNode.setLemma(StringUtils.join(sortedSubgraphNodes.stream().map(x -> x.lemma() == null ? x.word() : x.lemma()), " "));
if (sg.getRoots().contains(rootNode)) {

@Override
public boolean test(TypedDependency d) {
 if (d == null) return false;
 IndexedWord l = d.dep();
 if (l == null) return false;
 return npf.test(l.value()) && tf.test(l.tag());
}

/**
 * This hashCode uses only the docID, sentenceIndex, and index.
 * See compareTo for more info.
 */
@Override
public int hashCode() {
 if (cachedHashCode != 0) {
  return cachedHashCode;
 }
 boolean sensible = false;
 int result = 0;
 if (get(CoreAnnotations.DocIDAnnotation.class) != null) {
  result = get(CoreAnnotations.DocIDAnnotation.class).hashCode();
  sensible = true;
 }
 if (containsKey(CoreAnnotations.SentenceIndexAnnotation.class)) {
  result = 29 * result + get(CoreAnnotations.SentenceIndexAnnotation.class).hashCode();
  sensible = true;
 }
 if (containsKey(CoreAnnotations.IndexAnnotation.class)) {
  result = 29 * result + get(CoreAnnotations.IndexAnnotation.class).hashCode();
  sensible = true;
 }
 if ( ! sensible) {
  log.info("WARNING!!!  You have hashed an IndexedWord with no docID, sentIndex or wordIndex. You will almost certainly lose");
 }
 cachedHashCode = result;
 return result;
}

private static int getNodeIndex(IndexedWord token, int maxIndex) {
 if (token.copyCount() == 0) {
  return token.index();
 } else {
  return token.index() + maxIndex * token.copyCount();
 }
}

public IndexedWord makeSoftCopy(int count) {
 IndexedWord copy = new IndexedWord(label);
 copy.setCopyCount(count);
 copy.original = this;
 return copy;
}

public String toString() {
 return lex+" -> "+node.word()+":"+node.sentIndex()+"."+node.index();
}

/** A helper method for
 * {@link NaturalLogicAnnotator#getModifierSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)} and
 * {@link NaturalLogicAnnotator#getSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)}.
 */
private static Pair<Integer, Integer> getGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, Set<String> validArcs) {
 int min = root.index();
 int max = root.index();
 Queue<IndexedWord> fringe = new LinkedList<>();
 for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(root)) {
  String edgeLabel = edge.getRelation().getShortName();
  if ((validArcs == null || validArcs.contains(edgeLabel)) &&
    !"punct".equals(edgeLabel)) {
   fringe.add(edge.getDependent());
  }
 }
 while (!fringe.isEmpty()) {
  IndexedWord node = fringe.poll();
  min = Math.min(node.index(), min);
  max = Math.max(node.index(), max);
  // ignore punctuation
  fringe.addAll(tree.getOutEdgesSorted(node).stream().filter(edge -> edge.getGovernor().equals(node) &&
    !(edge.getGovernor().equals(edge.getDependent())) &&
    !"punct".equals(edge.getRelation().getShortName())).map(SemanticGraphEdge::getDependent).collect(Collectors.toList()));
 }
 return Pair.makePair(min, max + 1);
}

if (authenticator != null && !authenticator.test(props)) {
 respondUnauthorized(httpExchange);
 return;
   if (filter) {
    docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence ->
        regex.matcher(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)).matches()
    ).collect(Collectors.toList()));
   } else {
    docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> {
     SemgrexMatcher matcher = regex.matcher(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class));
     int i = 0;
      sentWriter.set(Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> {
       IndexedWord match = matcher.getMatch();
       matchWriter.set("text", match.word());
       matchWriter.set("begin", match.index() - 1);
       matchWriter.set("end", match.index());
       for (String capture : matcher.getNodeNames()) {
        matchWriter.set("$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> {
         IndexedWord node = matcher.getNode(capture);
         groupWriter.set("text", node.word());
         groupWriter.set("begin", node.index() - 1);
         groupWriter.set("end", node.index());
        });

/**
 * Return a Protobuf RelationTriple from a RelationTriple.
 */
public static CoreNLPProtos.SentenceFragment toProto(SentenceFragment fragment) {
 return CoreNLPProtos.SentenceFragment.newBuilder()
   .setAssumedTruth(fragment.assumedTruth)
   .setScore(fragment.score)
   .addAllTokenIndex(fragment.words.stream().map(x -> x.index() - 1).collect(Collectors.toList()))
   .setRoot(fragment.parseTree.getFirstRoot().index() - 1)
   .build();
}

CoreNLPProtos.DependencyGraph.Builder builder = CoreNLPProtos.DependencyGraph.newBuilder();
Set<Integer> rootSet = graph.getRoots().stream().map(IndexedWord::index).collect(Collectors.toCollection(IdentityHashSet::new));
for (IndexedWord node : graph.vertexSet()) {
   .setSentenceIndex(node.get(SentenceIndexAnnotation.class))
   .setIndex(node.index());
 if (node.copyCount() > 0) {
  nodeBuilder.setCopyAnnotation(node.copyCount());
 if (rootSet.contains(node.index())) {
  builder.addRoot(node.index());
for (SemanticGraphEdge edge : graph.edgeIterable()) {
   .setSource(edge.getSource().index())
   .setTarget(edge.getTarget().index())
   .setDep(edge.getRelation().toString())
   .setIsExtra(edge.isExtra())
   .setSourceCopy(edge.getSource().copyCount())
   .setTargetCopy(edge.getTarget().copyCount())
   .setLanguage(toProto(edge.getRelation().getLanguage())));

wordList.stream().filter(w -> w.tag() != null && w.tag().equals(COMMENT_POS))
    .forEach(w -> {
     lineNumberCounter++;
     comments.add(w.word());
    });
wordList.stream().filter(w -> w.tag() == null || ! w.tag().equals(COMMENT_POS))
    .filter(w -> !w.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class))
 if (word.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
  tokenSpan = word.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class);
  originalToken = word.word();
 } else {
  if (tokenSpan != null && tokenSpan.getTarget() >= word.index()) {
   word.setOriginalText(originalToken);
   word.set(CoreAnnotations.CoNLLUTokenSpanAnnotation.class, tokenSpan);
  } else {
   tokenSpan = null;
  int basicGovIdx = word.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class) != null ?
    word.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class) : -1;
  TypedDependency basicDep = null;
  if (basicGovIdx > -1) {
   Pair<IndexedWord, GrammaticalRelation> basicGovReln = getGovAndReln((double) basicGovIdx, word,
     word.get(CoreAnnotations.CoNLLDepTypeAnnotation.class), sortedTokens);
   IndexedWord basicGov = basicGovReln.first();
   GrammaticalRelation basicReln = basicGovReln.second();

/** The head of the relation of this relation triple. */
@Override
public CoreLabel relationHead() {
 if (relation.size() == 1) { return relation.get(0); }
 CoreLabel guess = null;
 CoreLabel newGuess = super.relationHead();
 int iters = 0;  // make sure we don't infinite loop...
 while (guess != newGuess && iters < 100) {
  guess = newGuess;
  iters += 1;
  for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(guess))) {
   // find a node in the relation list which is a governor of the candidate root
   Optional<CoreLabel> governor = relation.stream().filter(x -> x.index() == edge.getGovernor().index()).findFirst();
   // if we found one, this is the new root. The for loop continues
   if (governor.isPresent()) {
    newGuess = governor.get();
   }
  }
 }
 // Return
 if (iters >= 100) {
  err("Likely cycle in relation tree");
 }
 return guess;
}

/**
 * Convert a dependency graph to a format expected as input to {@link Writer#set(String, Object)}.
 */
@SuppressWarnings("RedundantCast")  // It's lying; we need the "redundant" casts (as of 2014-09-08)
private static Object buildDependencyTree(SemanticGraph graph) {
 if(graph != null) {
  return Stream.concat(
    // Roots
    graph.getRoots().stream().map( (IndexedWord root) -> (Consumer<Writer>) dep -> {
     dep.set("dep", "ROOT");
     dep.set("governor", 0);
     dep.set("governorGloss", "ROOT");
     dep.set("dependent", root.index());
     dep.set("dependentGloss", root.word());
    }),
    // Regular edges
    graph.edgeListSorted().stream().map( (SemanticGraphEdge edge) -> (Consumer<Writer>) (Writer dep) -> {
     dep.set("dep", edge.getRelation().toString());
     dep.set("governor", edge.getGovernor().index());
     dep.set("governorGloss", edge.getGovernor().word());
     dep.set("dependent", edge.getDependent().index());
     dep.set("dependentGloss", edge.getDependent().word());
    })
  );
 } else {
  return null;
 }
}

@Override
public String toString() {
 List<Pair<String, Integer>> glosses = new ArrayList<>();
 for (CoreLabel word : words) {
  // Add the word itself
  glosses.add(Pair.makePair(word.word(), word.index() - 1));
  String addedConnective = null;
  // Find additional connectives
  for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) {
   String rel = edge.getRelation().toString();
   if (rel.contains("_")) {  // for Stanford dependencies only
    addedConnective = rel.substring(rel.indexOf('_') + 1);
   }
  }
  if (addedConnective != null) {
   // Found a connective (e.g., a preposition or conjunction)
   Pair<Integer, Integer> yield = parseTree.yieldSpan(new IndexedWord(word));
   glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1));
  }
 }
 // Sort the sentence
 Collections.sort(glosses, (a, b) -> a.second - b.second);
 // Return the sentence
 return StringUtils.join(glosses.stream().map(Pair::first), " ");
}

private static Object getNodes(SemanticGraph graph) {
 if(graph != null) {
  List<IndexedWord> vertexList = graph.vertexListSorted();
  int maxIndex = vertexList.get(vertexList.size() - 1).index();
  return vertexList.stream().map( (IndexedWord token) -> (Consumer<Writer>) node -> {
     if (token.copyCount() == 0) {
      node.set("id", getNodeIndex(token, maxIndex));
      node.set("start", token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
      node.set("end", token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
     } else {
      node.set("id", getNodeIndex(token, maxIndex));
      node.set("source", token.index());
     }
    node.set("form", token.word());
    if (graph.getRoots().contains(token)) node.set("top", true);
     node.set("properties", (Consumer<Writer>) propertiesWriter -> {
      propertiesWriter.set("xpos", token.tag());
      propertiesWriter.set("upos", token.get(CoreAnnotations.CoarseTagAnnotation.class));
      propertiesWriter.set("lemma", token.lemma());
     });
     node.set("edges", graph.getOutEdgesSorted(token).stream().map( (SemanticGraphEdge dep) -> (Consumer<Writer>) edge -> {
      edge.set("target", getNodeIndex(dep.getDependent(), maxIndex));
      edge.set("label", dep.getRelation().toString());
     }));
  } );
 } else {
  return null;
 }
}

public String getRelation() {
 if(headIndexedWord == null) return null;
 if(enhancedDependency.getRoots().isEmpty()) return null;
 if(enhancedDependency.getFirstRoot().equals(headIndexedWord)) return "root";
 if(!enhancedDependency.containsVertex(getHeadParent())) return null;
 GrammaticalRelation relation = enhancedDependency.reln(getHeadParent(), headIndexedWord);
   && getHeadChildren().stream().anyMatch(c -> c.tag().equals("IN")))
   || relation == UniversalEnglishGrammaticalRelations.TEMPORAL_MODIFIER
   || relation == UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER

 Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
 for (IndexedWord child : children) {
  SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
  if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) {
   sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER,
     oldEdge.getWeight(), oldEdge.isExtra());
   sg.removeEdge(oldEdge);
for (int i = nameParts.get(0).index(), end = oldHead.index(); i < end; i++) {
 IndexedWord node = sg.getNodeByIndexSafe(i);
 if (node == null) {
  return;
 if ( ! nameParts.contains(node) && PUNCT_TAG_FILTER.test(node.tag())) {

if (descendantSet.contains(curr) || (doNotAddThese!=null && doNotAddThese.contains(curr)) || !acceptWord.test(curr.backingLabel())) {
 return;
if (!ignoreCommonTags || !ignoreTags.contains(curr.tag().trim())) {
 descendantSet.add(curr);
for (IndexedWord child : g.getChildren(curr)) {
 boolean dontuse = false;
 if (doNotAddThese!=null &&doNotAddThese.contains(child))
  rel = g.reln(curr, child);
  dontuse = checkIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel);
   if (child.tag().matches(cutOffTagRegex)) {
    if (DEBUG >= 5)
     System.out.println("ignored tag " + child
  if(!feat.containsKey(curr.index())){
   feat.put(curr.index(), new ArrayList<>());
  GetPatternsFromDataMultiClass.getFeatures(g, curr, false, feat.get(curr.index()), rel);

/**
 * Mostly just an alias, but make sure our featurizer is serializable!
 */
public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable {
 boolean isSimpleSplit(Counter<String> feats);
}

/** Look through the tree t and adds to the List basicDep
 *  additional dependencies which aren't
 *  in the List but which satisfy the filter puncTypedDepFilter.
 *
 * @param deps The list of dependencies which may be augmented
 * @param completeGraph a graph of all the tree dependencies found earlier
 * @param puncTypedDepFilter The filter that may skip punctuation dependencies
 * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter
 */
protected void getTreeDeps(List<TypedDependency> deps,
                DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> completeGraph,
                Predicate<TypedDependency> puncTypedDepFilter,
                Predicate<TypedDependency> extraTreeDepFilter) {
 for (TreeGraphNode gov : completeGraph.getAllVertices()) {
  for (TreeGraphNode dep : completeGraph.getChildren(gov)) {
   for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) {
    TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()));
    if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) {
     newDep.setExtra();
     deps.add(newDep);
    }
   }
  }
 }
}

Javadoc

This class provides a CoreLabel that uses its DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement Comparable/compareTo, hashCode, and equals. This means no other annotations, including the identity of the word, are taken into account when using these methods. Historically, this class was introduced for and is mainly used in the RTE package, and it provides a number of methods that are really specific to that use case. A second use case is now the Stanford Dependencies code, where this class directly implements the "copy nodes" of section 4.6 of the Stanford Dependencies Manual, rather than these being placed directly in the backing CoreLabel. This was so there can stay one CoreLabel per token, despite there being multiple IndexedWord nodes, additional ones representing copy nodes.

The actual implementation is to wrap a CoreLabel. This avoids breaking the equals() and hashCode() contract and also avoids expensive copying when used to represent the same data as the original CoreLabel.

Most used methods

index
get
tag
word
setWord
<init>
Copies the given label and then sets the docID, sentenceIndex, and Index; if these differ from those
containsKey
sentIndex
set
setIndex
setValue
toString
Prints the toString in the form of format.

Popular in Java

Making http requests using okhttp
getApplicationContext (Context)
getSystemService (Context)
getContentResolver (Context)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
JButton (javax.swing)
Best plugins for Eclipse

How to useIndexedWord in edu.stanford.nlp.ling

Best Java code snippets using edu.stanford.nlp.ling.IndexedWord (Showing top 20 results out of 315)

Refine search

How to use
IndexedWord
in
edu.stanford.nlp.ling