edu.stanford.nlp.pipeline.Annotator java code examples

public String[] splitText(String phraseText)
{
 String[] words;
 if (tokenizer != null) {
  Annotation annotation = new Annotation(phraseText);
  tokenizer.annotate(annotation);
  List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
  words = new String[tokens.size()];
  for (int i = 0; i < tokens.size(); i++) {
   words[i] = tokens.get(i).word();
  }
 } else {
  phraseText = possPattern.matcher(phraseText).replaceAll(" 's$1");
  words = delimPattern.split(phraseText);
 }
 return words;
}

Set<Class<? extends CoreAnnotation>> allRequirements = an.requires();
for (Class<? extends CoreAnnotation> requirement : allRequirements) {
 if (!requirementsSatisfied.contains(requirement)) {
requirementsSatisfied.addAll(an.requirementsSatisfied());

if (parse.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class) || System.getenv("CORENLP_HOST") != null) {
parse.annotate(ann);

@Override
public Set<Class<? extends CoreAnnotation>> requires() {
 if (annotators.isEmpty()) {
  return Collections.emptySet();
 }
 return annotators.get(0).requires();
}

@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
 Set<Class<? extends CoreAnnotation>> satisfied = Generics.newHashSet();
 for (Annotator annotator : annotators) {
  satisfied.addAll(annotator.requirementsSatisfied());
 }
 return satisfied;
}

if (parse.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class) || System.getenv("CORENLP_HOST") != null) {
parse.annotate(ann);

private Annotator getParser() {
 if(parserProcessor == null){
  Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse");
  if (parser == null) {
   Properties emptyProperties = new Properties();
   parser = new ParserAnnotator("coref.parse.md", emptyProperties);
  }
  if (parser == null) {
   // TODO: these assertions rule out the possibility of alternately named parse/pos annotators
   throw new AssertionError("Failed to get parser - this should not be possible");
  }
  if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) {
   Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos");
   if (tagger == null) {
    throw new AssertionError("Parser required tagger, but failed to find the pos annotator");
   }
   List<Annotator> annotators = Generics.newArrayList();
   annotators.add(tagger);
   annotators.add(parser);        
   parserProcessor = new AnnotationPipeline(annotators);
  } else {
   parserProcessor = parser;
  }
 }
 return parserProcessor;
}

@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
 Set<Class<? extends CoreAnnotation>> satisfied = Generics.newHashSet();
 for (Annotator annotator : annotators) {
  satisfied.addAll(annotator.requirementsSatisfied());
 }
 return satisfied;
}

public Annotation process(String sentence, String dateString, Annotator timeAnnotator) {
 log.info("Processing text \"" + sentence + "\" with dateString = " + dateString);
 Annotation anno = new Annotation(sentence);
 if (dateString != null && ! dateString.isEmpty()) {
  anno.set(CoreAnnotations.DocDateAnnotation.class, dateString);
 }
 pipeline.annotate(anno);
 timeAnnotator.annotate(anno);
 return anno;
}

Set<Requirement> allRequirements = an.requires();
for (Requirement requirement : allRequirements) {
 if (!requirementsSatisfied.contains(requirement)) {
requirementsSatisfied.addAll(an.requirementsSatisfied());

@Override
public Set<Requirement> requires() {
 if (annotators.isEmpty()) {
  return Collections.emptySet();
 }
 return annotators.get(0).requires();
}

@Override
public Set<Requirement> requirementsSatisfied() {
 Set<Requirement> satisfied = Generics.newHashSet();
 for (Annotator annotator : annotators) {
  satisfied.addAll(annotator.requirementsSatisfied());
 }
 return satisfied;
}

synchronized Document runKBP(Properties props) {
 if (haveRunKBP) {
  return this;
 }
 // Run prerequisites
 coref(props);
 Supplier<Annotator> entityMention = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultEntityMentions : getOrCreate(STANFORD_ENTITY_MENTIONS, props, () -> backend.entityMentions(props, STANFORD_ENTITY_MENTIONS));
 Annotation ann = asAnnotation(true);
 entityMention.get().annotate(ann);
 // Run annotator
 Supplier<Annotator> kbp = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultKBP : getOrCreate(STANFORD_KBP, props, () -> backend.kbp(props));
 kbp.get().annotate(ann);
 // Update data
 synchronized (serializer) {
  for (int i = 0; i < sentences.size(); ++i) {
   CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i);
   Collection<RelationTriple> triples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
   sentences.get(i).updateKBP(triples.stream().map(ProtobufAnnotationSerializer::toProto));
  }
 }
 // Return
 haveRunKBP = true;
 return this;
}

Set<Class<? extends CoreAnnotation>> allRequirements = an.requires();
for (Class<? extends CoreAnnotation> requirement : allRequirements) {
 if (!requirementsSatisfied.contains(requirement)) {
requirementsSatisfied.addAll(an.requirementsSatisfied());

@Override
public Set<Class<? extends CoreAnnotation>> requires() {
 if (annotators.isEmpty()) {
  return Collections.emptySet();
 }
 return annotators.get(0).requires();
}

synchronized Document runRegexner(Properties props) {
 // Run prerequisites
 runNER(props);
 // Run annotator
 Supplier<Annotator> ner = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultRegexner : getOrCreate(STANFORD_REGEXNER, props, () -> backend.tokensRegexNER(props, STANFORD_REGEXNER));
 Annotation ann = asAnnotation(true);
 ner.get().annotate(ann);
 // Update data
 for (int i = 0; i < sentences.size(); ++i) {
  sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setNer(pair.second), CoreLabel::ner);
 }
 return this;
}

private Annotator getParser() {
 if(parserProcessor == null){
  Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse");
  if (parser == null) {
   // TODO: these assertions rule out the possibility of alternately named parse/pos annotators
   throw new AssertionError("Failed to get parser - this should not be possible");
  }
  if (parser.requires().contains(Annotator.POS_REQUIREMENT)) {
   Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos");
   if (tagger == null) {
    throw new AssertionError("Parser required tagger, but failed to find the pos annotator");
   }
   List<Annotator> annotators = Generics.newArrayList();
   annotators.add(tagger);
   annotators.add(parser);        
   parserProcessor = new AnnotationPipeline(annotators);
  } else {
   parserProcessor = parser;
  }
 }
 return parserProcessor;
}

synchronized Document runOpenie(Properties props) {
 if (haveRunOpenie) {
  return this;
 }
 // Run prerequisites
 runNatlog(props);
 // Run annotator
 Supplier<Annotator> openie = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultOpenie : getOrCreate(STANFORD_OPENIE, props, () -> backend.openie(props));
 Annotation ann = asAnnotation(true);
 openie.get().annotate(ann);
 // Update data
 synchronized (serializer) {
  for (int i = 0; i < sentences.size(); ++i) {
   CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i);
   Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
   sentences.get(i).updateOpenIE(triples.stream().map(ProtobufAnnotationSerializer::toProto));
  }
 }
 // Return
 haveRunOpenie = true;
 return this;
}

private Annotator getParser() {
 if(parserProcessor == null){
  Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse");
  if (parser == null) {
   Properties emptyProperties = new Properties();
   parser = new ParserAnnotator("coref.parse.md", emptyProperties);
  }
  if (parser == null) {
   // TODO: these assertions rule out the possibility of alternately named parse/pos annotators
   throw new AssertionError("Failed to get parser - this should not be possible");
  }
  if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) {
   Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos");
   if (tagger == null) {
    throw new AssertionError("Parser required tagger, but failed to find the pos annotator");
   }
   List<Annotator> annotators = Generics.newArrayList();
   annotators.add(tagger);
   annotators.add(parser);        
   parserProcessor = new AnnotationPipeline(annotators);
  } else {
   parserProcessor = parser;
  }
 }
 return parserProcessor;
}

/**
 * Run the pipeline on an input annotation.
 * The annotation is modified in place.
 *
 * @param annotation The input annotation, usually a raw document
 */
@Override
public void annotate(Annotation annotation) {
 Iterator<MutableLong> it = accumulatedTime.iterator();
 Timing t = new Timing();
 for (Annotator annotator : annotators) {
  if (Thread.interrupted()) {  // Allow interrupting
   throw new RuntimeInterruptedException();
  }
  if (TIME) {
   t.start();
  }
  annotator.annotate(annotation);
  if (TIME) {
   long elapsed = t.stop();
   MutableLong m = it.next();
   m.incValue(elapsed);
  }
 }
}

Javadoc

This is an interface for adding annotations to a partially annotated Annotation. In some ways, it is just a glorified function, except that it explicitly operates in-place on Annotation objects. Annotators should be given to an AnnotationPipeline in order to make annotation pipelines (the whole motivation of this package), and therefore implementers of this interface should be designed to play well with other Annotators and in their javadocs they should explicitly state what annotations they are assuming already exist in the annotation (like parse, POS tag, etc), what keys they are expecting them under (see, for instance, the ones in CoreAnnotations), and what annotations they will add (or modify) and the keys for them as well. If you would like to look at the code for a relatively simple Annotator, I recommend NERAnnotator. For a lot of code you could just add the implements directly, but I recommend wrapping instead because I believe that it will help to keep the pipeline code more manageable. An Annotator should also provide a description of what it produces and a description of what it requires to have been produced by using Sets of requirements. The StanfordCoreNLP version of the AnnotationPipeline can enforce requirements, throwing an exception if an annotator does not have all of its prerequisites met. An Annotator which does not participate in this system can simply return Collections.emptySet() for both requires() and requirementsSatisfied(). Properties We extensively use Properties objects to configure each Annotator. In particular, CoreNLP has most of its properties in an informal namespace with properties names like "parse.maxlen" to specify that a property only applies to a parser annotator. There can also be global properties; they should not have any periods in their names. Each Annotator knows its own name; we assume these don't collide badly, though possibly two parsers could share the "parse.*" namespace. An Annotator should have a constructor that simply takes a Properties object. At this point, the Annotator should expect to be getting properties in namespaces. The classes that annotators call (like a concrete parser, tagger, or whatever) mainly expect properties not in namespaces. In general the annotator should subset the passed in properties to keep only global properties and ones in its own namespace, and then strip the namespace prefix from the latter properties.

Most used methods

annotate
Given an Annotation, perform a task on this Annotation.
requirementsSatisfied
Returns a set of requirements for which tasks this annotator can provide. For example, the POS annot
requires
Returns the set of tasks which this annotator requires in order to perform. For example, the POS ann

Popular in Java

Making http post requests using okhttp
scheduleAtFixedRate (ScheduledExecutorService)
getExternalFilesDir (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Best IntelliJ plugins

How to useAnnotator in edu.stanford.nlp.pipeline

Best Java code snippets using edu.stanford.nlp.pipeline.Annotator (Showing top 20 results out of 315)

How to use
Annotator
in
edu.stanford.nlp.pipeline