/** * Runs the entire pipeline on the content of the given text passed in. * @param text The text to process * @return An Annotation object containing the output of all annotators */ public Annotation process(String text) { Annotation annotation = new Annotation(text); annotate(annotation); return annotation; }
public static Annotation textToAnnotation(AnnotationPipeline pipeline, String text, String date) { Annotation annotation = new Annotation(text); annotation.set(CoreAnnotations.DocDateAnnotation.class, date); pipeline.annotate(annotation); return annotation; }
/** * Runs the entire pipeline on the content of the given text passed in. * @param text The text to process * @return An Annotation object containing the output of all annotators */ public Annotation process(String text) { Annotation annotation = new Annotation(text); annotate(annotation); return annotation; }
private static Annotation testAnnoation(String text,String[] args){ Annotation document = new Annotation(text); Properties props = StringUtils.argsToProperties(args); StanfordCoreNLP corenlp = new StanfordCoreNLP(props); corenlp.annotate(document); HybridCorefAnnotator hcoref = new HybridCorefAnnotator(props); hcoref.annotate(document); return document; }
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); } List<CoreLabel> newTokens = process(annotation, tokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them // redo the token indexes if xml tokens have been removed setTokenBeginTokenEnd(newTokens); annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens); if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); } } }
/** complete the wrapping process post annotation by a pipeline **/ public void wrapAnnotations() { // wrap all of the sentences if (this.annotationDocument.get(CoreAnnotations.SentencesAnnotation.class) != null) { wrapSentences(); // if there are entity mentions, build a document wide list if ( ! sentences.isEmpty() && sentences.get(0).entityMentions() != null) { buildDocumentEntityMentionsList(); } // if there are quotes, build a document wide list if (QuoteAnnotator.gatherQuotes(this.annotationDocument) != null) buildDocumentQuotesList(); } }
/** Annotate the CoreDocument wrapper. **/ public void annotate(CoreDocument document) { // annotate the underlying Annotation this.annotate(document.annotationDocument); // wrap the sentences and entity mentions post annotation document.wrapAnnotations(); }
private static AnnotationPipeline makeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator(true)); return pipeline; }
/** * Converts the given annotation to an XML document using options taken from the StanfordCoreNLP pipeline */ public static Document annotationToDoc(Annotation annotation, StanfordCoreNLP pipeline) { Options options = getOptions(pipeline.getProperties()); return annotationToDoc(annotation, options); }
private CoreMap createMergedChunk(int groupStart, int groupEnd) { CoreMap merged = null; /* if (annotation != null) { // Take start and end merged = ChunkAnnotationUtils.getMergedChunk(elements, annotation.get(CoreAnnotations.TextAnnotation.class), groupStart, groupEnd); } */ if (merged == null) { // Okay, have to go through these one by one and merge them merged = CoreMapAggregator.getDefaultAggregator().merge(elements, groupStart, groupEnd); } return merged; }
/** Static helper */ public static void prettyPrint(Annotation annotation, PrintWriter pw, StanfordCoreNLP pipeline) { TextOutputter.print(annotation, pw, getOptions(pipeline.getProperties())); // already flushed // don't close, might not want to close underlying stream }
public void print(Annotation annotation, OutputStream os, StanfordCoreNLP pipeline) throws IOException { print(annotation, os, getOptions(pipeline.getProperties())); }
/** * Serialize the given sentence (but not the associated document!) into a Protocol Buffer. * * @return The Protocol Buffer representing this sentence. */ public CoreNLPProtos.Sentence serialize() { synchronized (impl) { this.impl.clearToken(); for (CoreNLPProtos.Token.Builder token : this.tokensBuilders) { this.impl.addToken(token.build()); } return impl.build(); } }
/** * Append a CoreDocument to this output stream. * * @param document The CoreDocument to serialize (its internal annotation is serialized) * @param os The output stream to serialize to * @return The output stream which should be closed * @throws IOException */ public OutputStream writeCoreDocument(CoreDocument document, OutputStream os) throws IOException { Annotation wrappedAnnotation = document.annotation(); return write(wrappedAnnotation, os); }
/** * Create a new document from the passed in text and the given properties. * @param text The text of the document. */ public Document(Properties props, String text) { this.defaultProps = props; this.impl = CoreNLPProtos.Document.newBuilder().setText(text); }
@Override public String toString() { return annotation().toString(); }
/** * Run RegexNER -- rule-based NER based on a deterministic mapping file */ public Annotator tokensRegexNER(Properties properties, String name) { return new TokensRegexNERAnnotator(name, properties); }
/** * Populates options from StanfordCoreNLP pipeline. */ public static Options getOptions(Properties properties) { return new Options(properties); }
/** * Run TokensRegex -- annotate patterns found in tokens */ public Annotator tokensregex(Properties properties, String name) { return new TokensRegexAnnotator(name, properties); }
/** * Annotate for KBP relations */ public Annotator kbp(Properties properties) { return new KBPAnnotator(Annotator.STANFORD_KBP, properties); }