public String[] splitText(String phraseText) { String[] words; if (tokenizer != null) { Annotation annotation = new Annotation(phraseText); tokenizer.annotate(annotation); List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); words = new String[tokens.size()]; for (int i = 0; i < tokens.size(); i++) { words[i] = tokens.get(i).word(); } } else { phraseText = possPattern.matcher(phraseText).replaceAll(" 's$1"); words = delimPattern.split(phraseText); } return words; }
Set<Class<? extends CoreAnnotation>> allRequirements = an.requires(); for (Class<? extends CoreAnnotation> requirement : allRequirements) { if (!requirementsSatisfied.contains(requirement)) { requirementsSatisfied.addAll(an.requirementsSatisfied());
@Override public Set<Class<? extends CoreAnnotation>> requires() { if (annotators.isEmpty()) { return Collections.emptySet(); } return annotators.get(0).requires(); }
@Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() { Set<Class<? extends CoreAnnotation>> satisfied = Generics.newHashSet(); for (Annotator annotator : annotators) { satisfied.addAll(annotator.requirementsSatisfied()); } return satisfied; }
private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { Properties emptyProperties = new Properties(); parser = new ParserAnnotator("coref.parse.md", emptyProperties); } if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
@Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() { Set<Class<? extends CoreAnnotation>> satisfied = Generics.newHashSet(); for (Annotator annotator : annotators) { satisfied.addAll(annotator.requirementsSatisfied()); } return satisfied; }
public Annotation process(String sentence, String dateString, Annotator timeAnnotator) { log.info("Processing text \"" + sentence + "\" with dateString = " + dateString); Annotation anno = new Annotation(sentence); if (dateString != null && ! dateString.isEmpty()) { anno.set(CoreAnnotations.DocDateAnnotation.class, dateString); } pipeline.annotate(anno); timeAnnotator.annotate(anno); return anno; }
Set<Requirement> allRequirements = an.requires(); for (Requirement requirement : allRequirements) { if (!requirementsSatisfied.contains(requirement)) { requirementsSatisfied.addAll(an.requirementsSatisfied());
@Override public Set<Requirement> requires() { if (annotators.isEmpty()) { return Collections.emptySet(); } return annotators.get(0).requires(); }
@Override public Set<Requirement> requirementsSatisfied() { Set<Requirement> satisfied = Generics.newHashSet(); for (Annotator annotator : annotators) { satisfied.addAll(annotator.requirementsSatisfied()); } return satisfied; }
synchronized Document runKBP(Properties props) { if (haveRunKBP) { return this; } // Run prerequisites coref(props); Supplier<Annotator> entityMention = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultEntityMentions : getOrCreate(STANFORD_ENTITY_MENTIONS, props, () -> backend.entityMentions(props, STANFORD_ENTITY_MENTIONS)); Annotation ann = asAnnotation(true); entityMention.get().annotate(ann); // Run annotator Supplier<Annotator> kbp = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultKBP : getOrCreate(STANFORD_KBP, props, () -> backend.kbp(props)); kbp.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); Collection<RelationTriple> triples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class); sentences.get(i).updateKBP(triples.stream().map(ProtobufAnnotationSerializer::toProto)); } } // Return haveRunKBP = true; return this; }
Set<Class<? extends CoreAnnotation>> allRequirements = an.requires(); for (Class<? extends CoreAnnotation> requirement : allRequirements) { if (!requirementsSatisfied.contains(requirement)) { requirementsSatisfied.addAll(an.requirementsSatisfied());
@Override public Set<Class<? extends CoreAnnotation>> requires() { if (annotators.isEmpty()) { return Collections.emptySet(); } return annotators.get(0).requires(); }
synchronized Document runRegexner(Properties props) { // Run prerequisites runNER(props); // Run annotator Supplier<Annotator> ner = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultRegexner : getOrCreate(STANFORD_REGEXNER, props, () -> backend.tokensRegexNER(props, STANFORD_REGEXNER)); Annotation ann = asAnnotation(true); ner.get().annotate(ann); // Update data for (int i = 0; i < sentences.size(); ++i) { sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setNer(pair.second), CoreLabel::ner); } return this; }
private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(Annotator.POS_REQUIREMENT)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
synchronized Document runOpenie(Properties props) { if (haveRunOpenie) { return this; } // Run prerequisites runNatlog(props); // Run annotator Supplier<Annotator> openie = (props == EMPTY_PROPS || props == SINGLE_SENTENCE_DOCUMENT) ? defaultOpenie : getOrCreate(STANFORD_OPENIE, props, () -> backend.openie(props)); Annotation ann = asAnnotation(true); openie.get().annotate(ann); // Update data synchronized (serializer) { for (int i = 0; i < sentences.size(); ++i) { CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(i); Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); sentences.get(i).updateOpenIE(triples.stream().map(ProtobufAnnotationSerializer::toProto)); } } // Return haveRunOpenie = true; return this; }
private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { Properties emptyProperties = new Properties(); parser = new ParserAnnotator("coref.parse.md", emptyProperties); } if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
/** * Run the pipeline on an input annotation. * The annotation is modified in place. * * @param annotation The input annotation, usually a raw document */ @Override public void annotate(Annotation annotation) { Iterator<MutableLong> it = accumulatedTime.iterator(); Timing t = new Timing(); for (Annotator annotator : annotators) { if (Thread.interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } if (TIME) { t.start(); } annotator.annotate(annotation); if (TIME) { long elapsed = t.stop(); MutableLong m = it.next(); m.incValue(elapsed); } } }