public String[] splitText(String phraseText) { String[] words; if (tokenizer != null) { Annotation annotation = new Annotation(phraseText); tokenizer.annotate(annotation); List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); words = new String[tokens.size()]; for (int i = 0; i < tokens.size(); i++) { words[i] = tokens.get(i).word(); } } else { phraseText = possPattern.matcher(phraseText).replaceAll(" 's$1"); words = delimPattern.split(phraseText); } return words; }
public Annotation process(String sentence, String dateString, Annotator timeAnnotator) { log.info("Processing text \"" + sentence + "\" with dateString = " + dateString); Annotation anno = new Annotation(sentence); if (dateString != null && ! dateString.isEmpty()) { anno.set(CoreAnnotations.DocDateAnnotation.class, dateString); } pipeline.annotate(anno); timeAnnotator.annotate(anno); return anno; }
synchronized Document runKBP(Properties props) { if (haveRunKBP) { return this; } // Run prerequisites coref(props); Supplier<Annotator> entityMention = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultEntityMentions : getOrCreate(STANFORD_ENTITY_MENTIONS, props, () -> backend.entityMentions(props, STANFORD_ENTITY_MENTIONS)); Annotation ann = asAnnotation(true); entityMention.get().annotate(ann); // Run annotator Supplier<Annotator> kbp = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultKBP : getOrCreate(STANFORD_KBP, props, () -> backend.kbp(props)); kbp.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); Collection<RelationTriple> triples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class); sentences.get(i).updateKBP(triples.stream().map(ProtobufAnnotationSerializer::toProto)); } } // Return haveRunKBP = true; return this; }
synchronized Document runRegexner(Properties props) { // Run prerequisites runNER(props); // Run annotator Supplier<Annotator> ner = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultRegexner : getOrCreate(STANFORD_REGEXNER, props, () -> backend.tokensRegexNER(props, STANFORD_REGEXNER)); Annotation ann = asAnnotation(true); ner.get().annotate(ann); // Update data for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setNer(pair.second), CoreLabel::ner); } return this; }
synchronized Document runOpenie(Properties props) { if (haveRunOpenie) { return this; } // Run prerequisites runNatlog(props); // Run annotator Supplier<Annotator> openie = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultOpenie : getOrCreate(STANFORD_OPENIE, props, () -> backend.openie(props)); Annotation ann = asAnnotation(true); openie.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); sentences.get(i).updateOpenIE(triples.stream().map(ProtobufAnnotationSerializer::toProto)); } } // Return haveRunOpenie = true; return this; }
/** * Run the pipeline on an input annotation. * The annotation is modified in place. * * @param annotation The input annotation, usually a raw document */ @Override public void annotate(Annotation annotation) { Iterator<MutableLong> it = accumulatedTime.iterator(); Timing t = new Timing(); for (Annotator annotator : annotators) { if (Thread.interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } if (TIME) { t.start(); } annotator.annotate(annotation); if (TIME) { long elapsed = t.stop(); MutableLong m = it.next(); m.incValue(elapsed); } } }
synchronized Document runNER(Properties props) { if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawToken(0).hasNer()) { return this; } // Run prerequisites runPOS(props); // Run annotator Supplier<Annotator> ner = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultNER : getOrCreate(STANFORD_NER, props, () -> backend.ner(props)); Annotation ann = asAnnotation(true); ner.get().annotate(ann); // Update data for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setNer(pair.second), CoreLabel::ner); } return this; }
synchronized Document runPOS(Properties props) { // Cached result if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawToken(0).hasPos()) { return this; } // Prerequisites sentences(); // Run annotator Supplier<Annotator> pos = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultPOS : getOrCreate(STANFORD_POS, props, () -> backend.posTagger(props)); Annotation ann = asAnnotation(false); pos.get().annotate(ann); // Update data for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setPos(pair.second), CoreLabel::tag); } return this; }
private Tree parse(List<CoreLabel> tokens, List<ParserConstraint> constraints) { CoreMap sent = new Annotation(""); sent.set(CoreAnnotations.TokensAnnotation.class, tokens); sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints); Annotation doc = new Annotation(""); List<CoreMap> sents = new ArrayList<>(1); sents.add(sent); doc.set(CoreAnnotations.SentencesAnnotation.class, sents); getParser().annotate(doc); sents = doc.get(CoreAnnotations.SentencesAnnotation.class); return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class); }
synchronized Document runLemma(Properties props) { // Cached result if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawToken(0).hasLemma()) { return this; } // Prerequisites runPOS(props); // Run annotator Supplier<Annotator> lemma = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultLemma : getOrCreate(STANFORD_LEMMA, props, () -> backend.morpha(props, false)); Annotation ann = asAnnotation(true); lemma.get().annotate(ann); // Update data for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setLemma(pair.second), CoreLabel::lemma); } return this; }
private Tree parse(List<CoreLabel> tokens, List<ParserConstraint> constraints) { CoreMap sent = new Annotation(""); sent.set(CoreAnnotations.TokensAnnotation.class, tokens); sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints); Annotation doc = new Annotation(""); List<CoreMap> sents = new ArrayList<>(1); sents.add(sent); doc.set(CoreAnnotations.SentencesAnnotation.class, sents); getParser().annotate(doc); sents = doc.get(CoreAnnotations.SentencesAnnotation.class); return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class); }
protected Tree parse(List<CoreLabel> tokens, List<ParserConstraint> constraints) { CoreMap sent = new Annotation(""); sent.set(CoreAnnotations.TokensAnnotation.class, tokens); sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints); Annotation doc = new Annotation(""); List<CoreMap> sents = new ArrayList<>(); sents.add(sent); doc.set(CoreAnnotations.SentencesAnnotation.class, sents); getParser().annotate(doc); sents = doc.get(CoreAnnotations.SentencesAnnotation.class); return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class); }
synchronized Document runSentiment(Properties props) { if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawSentence().hasSentiment()) { return this; } // Run prerequisites runParse(props); if (this.sentences != null && this.sentences.size() > 0 && !this.sentences.get(0).rawSentence().hasBinarizedParseTree()) { throw new IllegalStateException("No binarized parse tree (perhaps it's not supported in this language?)"); } // Run annotator Annotation ann = asAnnotation(true); Supplier<Annotator> sentiment = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultSentiment : getOrCreate(STANFORD_SENTIMENT, props, () -> backend.sentiment(props, STANFORD_SENTIMENT)); sentiment.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); String sentimentClass = sentence.get(SentimentCoreAnnotations.SentimentClass.class); sentences.get(i).updateSentiment(sentimentClass); } } // Return return this; }
synchronized Document runDepparse(Properties props) { if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawSentence().hasBasicDependencies()) { return this; } // Run prerequisites runPOS(props); // Run annotator Supplier<Annotator> depparse = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultDepparse : getOrCreate(STANFORD_DEPENDENCIES, props, () -> backend.dependencies(props)); Annotation ann = asAnnotation(true); depparse.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); sentences.get(i).updateDependencies( ProtobufAnnotationSerializer.toProto(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)), ProtobufAnnotationSerializer.toProto(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)), ProtobufAnnotationSerializer.toProto(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class))); } } return this; }
synchronized Document runNatlog(Properties props) { if (this.sentences != null && this.sentences.size() > 0 && this.sentences.get(0).rawToken(0).hasPolarity()) { return this; } // Run prerequisites runLemma(props); runDepparse(props); // Run annotator Supplier<Annotator> natlog = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultNatlog : getOrCreate(STANFORD_NATLOG, props, () -> backend.natlog(props)); Annotation ann = asAnnotation(true); natlog.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (Pair<CoreNLPProtos.Token.Builder, Polarity> pair) -> pair.first().setPolarity(ProtobufAnnotationSerializer.toProto(pair.second())), x -> x.get(NaturalLogicAnnotations.PolarityAnnotation.class)); sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (Pair<CoreNLPProtos.Token.Builder, String> pair) -> pair.first().setPolarityDir(pair.second()), x -> x.get(NaturalLogicAnnotations.PolarityDirectionAnnotation.class)); sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (Pair<CoreNLPProtos.Token.Builder, OperatorSpec> pair) -> pair.first().setOperator(ProtobufAnnotationSerializer.toProto(pair.second())), x -> x.get(NaturalLogicAnnotations.OperatorAnnotation.class)); } } return this; }
parse.annotate(ann);
segmenterAnnotator.annotate(annotation);
protected Tree parse(List<CoreLabel> tokens) { CoreMap sent = new Annotation(""); sent.set(CoreAnnotations.TokensAnnotation.class, tokens); Annotation doc = new Annotation(""); List<CoreMap> sents = new ArrayList<CoreMap>(); sents.add(sent); doc.set(CoreAnnotations.SentencesAnnotation.class, sents); getParser().annotate(doc); sents = doc.get(CoreAnnotations.SentencesAnnotation.class); return sents.get(0).get(TreeAnnotation.class); }