edu.stanford.nlp.pipeline java code examples

/**
 * Runs the entire pipeline on the content of the given text passed in.
 * @param text The text to process
 * @return An Annotation object containing the output of all annotators
 */
public Annotation process(String text) {
 Annotation annotation = new Annotation(text);
 annotate(annotation);
 return annotation;
}

public static Annotation textToAnnotation(AnnotationPipeline pipeline, String text, String date) {
 Annotation annotation = new Annotation(text);
 annotation.set(CoreAnnotations.DocDateAnnotation.class, date);
 pipeline.annotate(annotation);
 return annotation;
}

/**
 * Runs the entire pipeline on the content of the given text passed in.
 * @param text The text to process
 * @return An Annotation object containing the output of all annotators
 */
public Annotation process(String text) {
 Annotation annotation = new Annotation(text);
 annotate(annotation);
 return annotation;
}

private static Annotation testAnnoation(String text,String[] args){
 Annotation document = new Annotation(text);
 Properties props = StringUtils.argsToProperties(args);
 StanfordCoreNLP corenlp = new StanfordCoreNLP(props);
 corenlp.annotate(document);
 HybridCorefAnnotator hcoref = new HybridCorefAnnotator(props);
 hcoref.annotate(document);
 return document;
}

@Override
public void annotate(Annotation annotation) {
 if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
  List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
  if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); }
  List<CoreLabel> newTokens = process(annotation, tokens);
  // We assume that if someone is using this annotator, they don't
  // want the old tokens any more and get rid of them
  // redo the token indexes if xml tokens have been removed
  setTokenBeginTokenEnd(newTokens);
  annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
  if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); }
 }
}

/** complete the wrapping process post annotation by a pipeline **/
public void wrapAnnotations() {
 // wrap all of the sentences
 if (this.annotationDocument.get(CoreAnnotations.SentencesAnnotation.class) != null) {
  wrapSentences();
  // if there are entity mentions, build a document wide list
  if ( ! sentences.isEmpty() && sentences.get(0).entityMentions() != null) {
   buildDocumentEntityMentionsList();
  }
  // if there are quotes, build a document wide list
  if (QuoteAnnotator.gatherQuotes(this.annotationDocument) != null)
   buildDocumentQuotesList();
 }
}

/** Annotate the CoreDocument wrapper. **/
public void annotate(CoreDocument document) {
 // annotate the underlying Annotation
 this.annotate(document.annotationDocument);
 // wrap the sentences and entity mentions post annotation
 document.wrapAnnotations();
}

private static AnnotationPipeline makeNumericPipeline() {
 AnnotationPipeline pipeline = new AnnotationPipeline();
 pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
 pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
 pipeline.addAnnotator(new POSTaggerAnnotator(false));
 pipeline.addAnnotator(new TimeAnnotator(true));
 return pipeline;
}

/**
 * Converts the given annotation to an XML document using options taken from the StanfordCoreNLP pipeline
 */
public static Document annotationToDoc(Annotation annotation, StanfordCoreNLP pipeline) {
 Options options = getOptions(pipeline.getProperties());
 return annotationToDoc(annotation, options);
}

private CoreMap createMergedChunk(int groupStart, int groupEnd) {
 CoreMap merged = null;
/*  if (annotation != null) {
  // Take start and end
  merged = ChunkAnnotationUtils.getMergedChunk(elements, annotation.get(CoreAnnotations.TextAnnotation.class), groupStart, groupEnd);
 }  */
 if (merged == null) {
  // Okay, have to go through these one by one and merge them
  merged = CoreMapAggregator.getDefaultAggregator().merge(elements, groupStart, groupEnd);
 }
 return merged;
}

/** Static helper */
public static void prettyPrint(Annotation annotation, PrintWriter pw, StanfordCoreNLP pipeline) {
 TextOutputter.print(annotation, pw, getOptions(pipeline.getProperties()));
 // already flushed
 // don't close, might not want to close underlying stream
}

public void print(Annotation annotation, OutputStream os, StanfordCoreNLP pipeline) throws IOException {
 print(annotation, os, getOptions(pipeline.getProperties()));
}

/**
 * Serialize the given sentence (but not the associated document!) into a Protocol Buffer.
 *
 * @return The Protocol Buffer representing this sentence.
 */
public CoreNLPProtos.Sentence serialize() {
 synchronized (impl) {
  this.impl.clearToken();
  for (CoreNLPProtos.Token.Builder token : this.tokensBuilders) {
   this.impl.addToken(token.build());
  }
  return impl.build();
 }
}

/**
 * Append a CoreDocument to this output stream.
 *
 * @param document The CoreDocument to serialize (its internal annotation is serialized)
 * @param os The output stream to serialize to
 * @return The output stream which should be closed
 * @throws IOException
 */
public OutputStream writeCoreDocument(CoreDocument document, OutputStream os) throws IOException {
 Annotation wrappedAnnotation = document.annotation();
 return write(wrappedAnnotation, os);
}

/**
 * Create a new document from the passed in text and the given properties.
 * @param text The text of the document.
 */
public Document(Properties props, String text) {
 this.defaultProps = props;
 this.impl = CoreNLPProtos.Document.newBuilder().setText(text);
}

@Override
public String toString() {
 return annotation().toString();
}

/**
 * Run RegexNER -- rule-based NER based on a deterministic mapping file
 */
public Annotator tokensRegexNER(Properties properties, String name) {
 return new TokensRegexNERAnnotator(name, properties);
}

/**
 * Populates options from StanfordCoreNLP pipeline.
 */
public static Options getOptions(Properties properties) {
 return new Options(properties);
}

/**
 * Run TokensRegex -- annotate patterns found in tokens
 */
public Annotator tokensregex(Properties properties, String name) {
 return new TokensRegexAnnotator(name, properties);
}

/**
 * Annotate for KBP relations
 */
public Annotator kbp(Properties properties) {
 return new KBPAnnotator(Annotator.STANFORD_KBP, properties);
}

How to use edu.stanford.nlp.pipeline

Best Java code snippets using edu.stanford.nlp.pipeline (Showing top 20 results out of 450)