edu.stanford.nlp.util.CoreMap.set java code examples

Refine search

public boolean updatePredictions(CoreMap quote, Pair<String, String> speakerAndMethod) {
 if(speakerAndMethod.first != null && speakerAndMethod.second != null) {
  quote.set(QuoteAttributionAnnotator.SpeakerAnnotation.class, characterMap.get(speakerAndMethod.first).get(0).name);
  quote.set(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class, "Baseline Top" + speakerAndMethod.second);
  return true;
 }
 return false;
}

@Override
public List<IN> classify(List<IN> document) {
 for (IN token : document) {
  String presetAnswer = token.get(CoreAnnotations.PresetAnswerAnnotation.class);
  token.set(CoreAnnotations.AnswerAnnotation.class, presetAnswer);
 }
 return document;
}

public static void addEntityMention(CoreMap sentence, EntityMention arg) {
 List<EntityMention> l = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
 if(l == null){
  l = new ArrayList<>();
  sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, l);
 }
 l.add(arg);
}

private CoreMap makeTimexMap(TimexNode node, List<CoreLabel> tokens, CoreMap sentence) {
 CoreMap timexMap = new ArrayCoreMap();
 timexMap.set(TimeAnnotations.TimexAnnotation.class, node.timex);
 timexMap.set(CoreAnnotations.TextAnnotation.class, node.contents);
 timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, beginOffset(tokens.get(0)));
 timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endOffset(tokens.get(tokens.size()-1)));
 timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokens.get(0).index());
 timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokens.get(tokens.size()-1).index());
 timexMap.set(CoreAnnotations.TokensAnnotation.class, tokens);
 if (sentence.get(TimeAnnotations.TimexAnnotations.class) == null) {
  sentence.set(TimeAnnotations.TimexAnnotations.class, new ArrayList<>());
 }
 sentence.get(TimeAnnotations.TimexAnnotations.class).add(timexMap);
 // update NER for tokens
 for (CoreLabel token : tokens) {
  token.set(CoreAnnotations.NamedEntityTagAnnotation.class, "DATE");
  token.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, node.timex.value());
  token.set(TimeAnnotations.TimexAnnotation.class, node.timex);
 }
 return timexMap;
}

for (int i = 0; i < words.size(); ++i) {
 CoreLabel token = new CoreLabel(12);
 token.setWord(words.get(i));
 token.setValue(words.get(i));
 token.setBeginPosition(beginChar);
 token.setEndPosition(beginChar + words.get(i).length());
 beginChar += words.get(i).length() + 1;
 token.setLemma(lemmas.get(i));
 token.set(CoreAnnotations.TokenBeginAnnotation.class, i);
 token.set(CoreAnnotations.TokenEndAnnotation.class, i + 1);
 tokens.add(token);
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
SemanticGraph graph = tree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
SemanticGraph maltGraph = maltTree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, maltGraph);
sentence.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
sentence.set(CoreAnnotations.TextAnnotation.class, gloss);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokens.size());
Annotation doc = new Annotation(gloss);
doc.set(CoreAnnotations.TokensAnnotation.class, tokens);

public void annotateSentence(CoreMap sentence) {
 // this stores all relation mentions generated by this extractor
 List<RelationMention> relations = new ArrayList<>();
 // extractAllRelations creates new objects for every predicted relation
 for (RelationMention rel : extractAllRelations(sentence)) {
  // add all relations. potentially useful for a joint model
  // if (! RelationMention.isUnrelatedLabel(rel.getType()))
  relations.add(rel);
 }
 // caution: this removes the old list of relation mentions!
 for (RelationMention r: relations) {
  if (! r.getType().equals(RelationMention.UNRELATED)) {
   logger.fine("Found positive relation in annotateSentence: " + r);
  }
 }
 sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, relations);
}

 public void doMentionToSpeaker(Annotation doc) {
  for(CoreMap quote : doc.get(CoreAnnotations.QuotationsAnnotation.class)) {
   if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) == null) {
    quote.set(QuoteAttributionAnnotator.SpeakerAnnotation.class, characterMap.get(Counters.toSortedList(topSpeakerList).get(0)).get(0).name);
    quote.set(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class, "majority speaker baseline");
   }
  }
 }
}

@Override
protected Collection<List<IN>> loadAuxiliaryData(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter) {
 if (flags.unsupDropoutFile != null) {
  log.info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile);
  Timing timer = new Timing();
  timer.start();
  unsupDocs = new ArrayList<>();
  ObjectBank<List<IN>> unsupObjBank = makeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter);
  for (List<IN> doc : unsupObjBank) {
   for (IN tok: doc) {
    tok.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol);
    tok.set(CoreAnnotations.GoldAnswerAnnotation.class, flags.backgroundSymbol);
   }
   unsupDocs.add(doc);
  }
  long elapsedMs = timer.stop();
  log.info("Time to read: : " + Timing.toSecondsString(elapsedMs) + " seconds");
 }
 if (unsupDocs != null && flags.doFeatureDiscovery) {
  List<List<IN>> totalDocs = new ArrayList<>();
  totalDocs.addAll(docs);
  totalDocs.addAll(unsupDocs);
  return totalDocs;
 } else
  return docs;
}

public static void addEntityMentions(CoreMap sentence, Collection<EntityMention> args) {
 List<EntityMention> l = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
 if(l == null){
  l = new ArrayList<>();
  sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, l);
 }
 l.addAll(args);
}

/**
 * Concatenates separate words of a date or other numeric quantity into one node (e.g., 3 November -> 3_November)
 * Tag is CD or NNP, and other words are added to the remove list
 */
private static <E extends CoreMap> void concatenateNumericString(List<E> words, List<E> toRemove) {
 if (words.size() <= 1) return;
 boolean first = true;
 StringBuilder newText = new StringBuilder();
 E foundEntity = null;
 for (E word : words) {
  if (foundEntity == null && (word.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD") || word.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP"))) {
   foundEntity = word;
  }
  if (first) {
   first = false;
  } else {
   newText.append('_');
  }
  newText.append(word.get(CoreAnnotations.TextAnnotation.class));
 }
 if (foundEntity == null) {
  foundEntity = words.get(0);//if we didn't find one with the appropriate tag, just take the first one
 }
 toRemove.addAll(words);
 toRemove.remove(foundEntity);
 foundEntity.set(CoreAnnotations.PartOfSpeechAnnotation.class, "CD");  // cdm 2008: is this actually good for dates??
 String collapsed = newText.toString();
 foundEntity.set(CoreAnnotations.TextAnnotation.class, collapsed);
 foundEntity.set(CoreAnnotations.OriginalTextAnnotation.class, collapsed);
}

public static void addRelationMention(CoreMap sentence, RelationMention arg) {
 List<RelationMention> l = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
 if(l == null){
  l = new ArrayList<>();
  sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, l);
 }
 l.add(arg);
}

int tokenBegin = -1;
for (int i = 0; i < tokens.size(); i++) {
 CoreLabel token = tokens.get(i);
 String label = (String) token.get(labelKey);
 LabelTagType curTagType = getTagType(label);
  CoreLabel prev = null;
  if (i > 0) {
   prev = tokens.get(i-1);
   CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset,
     tokenChunkKey, textKey, tokenLabelKey);
   chunk.set(labelKey, prevTagType.type);
   chunks.add(chunk);
   tokenBegin = -1;
 CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokens.size(), totalTokensOffset,
   tokenChunkKey, textKey, tokenLabelKey);
 chunk.set(labelKey, prevTagType.type);
 chunks.add(chunk);

 @Override
 public List<IN> drawSample() {
  int[] sampleArray = sampler.bestSequence(model);
  List<IN> sample = new ArrayList<>();
  int i = 0;
  for (IN word : input) {
   IN newWord = tokenFactory.makeToken(word);
   newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++]));
   sample.add(newWord);
  }
  return sample;
 }
};

List<CoreLabel> docTokens = docAnnotation.get(CoreAnnotations.TokensAnnotation.class);
List<CoreMap> sentences = docAnnotation.get(CoreAnnotations.SentencesAnnotation.class);
int i = 0;
CoreLabel curDocToken = docTokens.get(0);
for (CoreMap sentence:sentences) {
 List<CoreLabel> sentTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel sentTokenFirst = sentTokens.get(0);
 while (curDocToken != sentTokenFirst) {
  i++;
  if (i >= docTokens.size()) { return false; }
  curDocToken = docTokens.get(i);
 sentence.set(CoreAnnotations.TokenBeginAnnotation.class, sentTokenBegin);
 sentence.set(CoreAnnotations.TokenEndAnnotation.class, sentTokenEnd);

/**
 * Annotates a CoreMap representing a chunk with basic chunk information.
 *   CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk
 *   CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk
 *   TokensAnnotation - List of tokens in this chunk
 *   TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens)
 *                          tokenStartIndex + totalTokenOffset
 *   TokenEndAnnotation - Index of last token in chunk (index in original list of tokens)
 *                          tokenEndIndex + totalTokenOffset
 * @param chunk - CoreMap to be annotated
 * @param tokens - List of tokens to look for chunks
 * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts
 * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive)
 * @param totalTokenOffset - Index of tokens to offset by
 */
public static void annotateChunk(CoreMap chunk,
                 List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex,  int totalTokenOffset) {
 List<CoreLabel> chunkTokens = new ArrayList<>(tokens.subList(tokenStartIndex, tokenEndIndex));
 chunk.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class,
     chunkTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
 chunk.set(CoreAnnotations.CharacterOffsetEndAnnotation.class,
     chunkTokens.get(chunkTokens.size()-1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
 chunk.set(CoreAnnotations.TokensAnnotation.class, chunkTokens);
 chunk.set(CoreAnnotations.TokenBeginAnnotation.class, tokenStartIndex+totalTokenOffset);
 chunk.set(CoreAnnotations.TokenEndAnnotation.class, tokenEndIndex+totalTokenOffset);
}

public static void addRelationMentions(CoreMap sentence, Collection<RelationMention> args) {
 List<RelationMention> l = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
 if(l == null){
  l = new ArrayList<>();
  sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, l);
 }
 l.addAll(args);
}

private static <V> void copyValue(CoreMap source, CoreMap target, Class k) {
 Class<? extends TypesafeMap.Key<V>> k_ = (Class<? extends TypesafeMap.Key<V>>) k;
 target.set(k_, source.get(k_));
}

CoreMap firstChunk = chunkList.get(chunkIndexStart);
CoreMap lastChunk = chunkList.get(chunkIndexEnd-1);
int firstCharOffset = firstChunk.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int lastCharOffset = lastChunk.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
int firstTokenIndex = firstChunk.get(CoreAnnotations.TokenBeginAnnotation.class);
int lastTokenIndex = lastChunk.get(CoreAnnotations.TokenEndAnnotation.class);
newChunk.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, firstCharOffset);
newChunk.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, lastCharOffset);
newChunk.set(CoreAnnotations.TokenBeginAnnotation.class, firstTokenIndex);
newChunk.set(CoreAnnotations.TokenEndAnnotation.class, lastTokenIndex);
List<CoreLabel> tokens = new ArrayList<>(lastTokenIndex - firstTokenIndex);
for (int i = chunkIndexStart; i < chunkIndexEnd; i++) {
 CoreMap chunk = chunkList.get(i);
 tokens.addAll(chunk.get(CoreAnnotations.TokensAnnotation.class));
newChunk.set(CoreAnnotations.TokensAnnotation.class, tokens);

public static void addSentences(CoreMap dataset, List<CoreMap> sentences) {
 List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class);
 if(sents == null){
  sents = new ArrayList<>();
  dataset.set(CoreAnnotations.SentencesAnnotation.class, sents);
 }
 for(CoreMap sentence: sentences){
  sents.add(sentence);
 }
}

List<Tree> gsLeaves = gs.root().getLeaves();
for (int i = 0; i < gsLeaves.size(); i++) {
 TreeGraphNode leaf = (TreeGraphNode) gsLeaves.get(i);
 indexToPos.put(leaf.label().index(), i + 1);
  token.setWord(leaf.value());
  token.setTag(leaf.parent(tree).value());
  token.set(CoreAnnotations.CoarseTagAnnotation.class, uposLabels.get(index - 1).value());
  tokens.add(token);
 sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
 bf.append(dependenciesToCoNLLXString(deps, sentence));
} else {
  for (TypedDependency dep : deps) {
   if (dep.extra()) {
    extraDeps.add(dep);
   } else {
    bf.append(toStringIndex(dep, indexToPos));

Popular methods of CoreMap

get
containsKey
size
keySet
remove
has
toShorterString
Attempt to provide a briefer and more human readable String for the contents of a CoreMap. The metho
toString

Popular in Java

Making http post requests using okhttp
scheduleAtFixedRate (Timer)
notifyDataSetChanged (ArrayAdapter)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Best plugins for Eclipse

How to use setmethodin edu.stanford.nlp.util.CoreMap

Best Java code snippets using edu.stanford.nlp.util.CoreMap.set (Showing top 20 results out of 315)

Refine search

How to use
set
method
in
edu.stanford.nlp.util.CoreMap