private static StanfordCoreNLP getStanfordCoreNLP(){ Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); return new StanfordCoreNLP(props); } }
import java.util.Properties; import edu.stanford.nlp.pipeline.StanfordCoreNLP; public class NLP { /** * @param args */ public static void main(String[] args) { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP coreNLP = new StanfordCoreNLP(props); } }
public SUTimePipeline(Properties props) { // By default, we want to tokenize the text, split it into // sentences, and then put it through the sutime annotator. // We also want to pos tag it and put it through the number and // qen annotators. // Since there will be different options for the sutime annotator, // we will actually create a new sutime annotator for each query. // This should be inexpensive. if (props.getProperty("annotators") == null) { props.setProperty("annotators", "tokenize, ssplit, pos"); // "tokenize, ssplit, pos, number, qen"); } /* if (props.getProperty("customAnnotatorClass.number") == null) { props.setProperty("customAnnotatorClass.number", "edu.stanford.nlp.pipeline.NumberAnnotator"); } if (props.getProperty("customAnnotatorClass.qen") == null) { props.setProperty("customAnnotatorClass.qen", "edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator"); } */ // this replicates the tokenizer behavior in StanfordCoreNLP props.setProperty("tokenize.options", "invertible,ptb3Escaping=true"); this.pipeline = new StanfordCoreNLP(props); }
/** Load Stanford Processor: skip unnecessary annotator */ protected static StanfordCoreNLP loadStanfordProcessor(Properties props) { boolean replicateCoNLL = Boolean.parseBoolean(props.getProperty(Constants.REPLICATECONLL_PROP, "false")); Properties pipelineProps = new Properties(props); StringBuilder annoSb = new StringBuilder(""); if (!Constants.USE_GOLD_POS && !replicateCoNLL) { annoSb.append("pos, lemma"); } else { annoSb.append("lemma"); } if(Constants.USE_TRUECASE) { annoSb.append(", truecase"); } if (!Constants.USE_GOLD_NE && !replicateCoNLL) { annoSb.append(", ner"); } if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) { annoSb.append(", parse"); } String annoStr = annoSb.toString(); SieveCoreferenceSystem.logger.info("MentionExtractor ignores specified annotators, using annotators=" + annoStr); pipelineProps.setProperty("annotators", annoStr); return new StanfordCoreNLP(pipelineProps, false); }
public static void processCoreNLPIfDoesNotExist(File processedFile, Properties coreNLPProps, String text) { if (!processedFile.exists()) { try { StanfordCoreNLP coreNLP = new StanfordCoreNLP(coreNLPProps); Annotation processedAnnotation = coreNLP.process(text); //this document holds the split for paragraphs. ProtobufAnnotationSerializer pas = new ProtobufAnnotationSerializer(true); OutputStream fos = new BufferedOutputStream(new FileOutputStream(processedFile.getAbsolutePath())); pas.write(processedAnnotation, fos); } catch (IOException e) { e.printStackTrace(); } } }
@Override public void init() throws ServletException { pipeline = new StanfordCoreNLP(); String xslPath = getServletContext(). getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl"); try { Builder builder = new Builder(); Document stylesheet = builder.build(new File(xslPath)); corenlpTransformer = new XSLTransform(stylesheet); } catch (Exception e) { throw new ServletException(e); } }
public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); AceReader r = new AceReader(new StanfordCoreNLP(props, false), false); r.setLoggerLevel(Level.INFO); r.parse("/u/scr/nlp/data/ACE2005/"); // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data"); // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false); log.info("done"); }
public static void runTest(String test, String num) { System.out.println("Testing: " + test + " : num newline breaks: " + num); Annotation ann = new Annotation(test); Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(ann); Properties propsPara = new Properties(); propsPara.setProperty("paragraphBreak", num); ParagraphAnnotator para = new ParagraphAnnotator(propsPara, true); para.annotate(ann); for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sent); System.out.println(sent.get(CoreAnnotations.ParagraphIndexAnnotation.class)); } } }
private StanfordCoreNLP getStanfordCoreNLP(Properties props) { if (coreNLP != null) { return coreNLP; } Properties pipelineProps = new Properties(props); if (CorefProperties.conll(props)) { pipelineProps.setProperty("annotators", (CorefProperties.getLanguage(props) == Locale.CHINESE ? "lemma, ner" : "lemma") + (CorefProperties.useGoldMentions(props) ? "" : ", coref.mention")); pipelineProps.setProperty("ner.applyFineGrained", "false"); } else { pipelineProps.setProperty("annotators", "pos, lemma, ner, " + (CorefProperties.useConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.useGoldMentions(props) ? "" : ", coref.mention")); pipelineProps.setProperty("ner.applyFineGrained", "false"); } return (coreNLP = new StanfordCoreNLP(pipelineProps, false)); }
public static void testPP(String familyFile, String animateFile, String genderFile, String charactersFile, String modelFile) throws IOException, ClassNotFoundException { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote, quoteattribution"); props.setProperty("quoteattribution.familyWordsFile", familyFile); props.setProperty("quoteattribution.animacyWordsFile", animateFile); props.setProperty("quoteattribution.genderNamesFile", genderFile); props.setProperty("quoteattribution.charactersPath", charactersFile); props.setProperty("quoteattribution.modelPath", modelFile); StanfordCoreNLP coreNLP = new StanfordCoreNLP(props); Annotation processedAnnotation = coreNLP.process(test); List<CoreMap> quotes = processedAnnotation.get(CoreAnnotations.QuotationsAnnotation.class); for(CoreMap quote : quotes) { System.out.println("Quote: " + quote.get(CoreAnnotations.TextAnnotation.class)); if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) { System.out.println("Predicted Mention: " + quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.MentionSieveAnnotation.class)); } else { System.out.println("Predicted Mention: none"); } if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) != null) { System.out.println("Predicted Speaker: " + quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class)); } else { System.out.println("Predicted Speaker: none"); } System.out.println("===="); } System.out.println("Finished"); }
/** * A debugging method to try entity linking sentences from the console. * @throws IOException */ public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); List<CoreLabel> tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class); System.err.println(StringUtils.join(tokens.stream().map(x -> x.get(CoreAnnotations.WikipediaEntityAnnotation.class)), " ")); }); } }
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); String file = props.getProperty("file"); String loadFile = props.getProperty("loadFile"); if (loadFile != null && ! loadFile.isEmpty()) { CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false); InputStream is = new FileInputStream(loadFile); Pair<Annotation, InputStream> pair = ser.read(is); pair.second.close(); Annotation anno = pair.first; System.out.println(anno.toShorterString(StringUtils.EMPTY_STRING_ARRAY)); is.close(); } else if (file != null && ! file.equals("")) { String text = edu.stanford.nlp.io.IOUtils.slurpFile(file); Annotation doc = new Annotation(text); pipeline.annotate(doc); CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false); PrintStream os = new PrintStream(new FileOutputStream(file + ".ser")); ser.write(doc, os).close(); log.info("Serialized annotation saved in " + file + ".ser"); } else { log.info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]"); } }
/** * A debugging method to try relation extraction from the console. * @throws IOException If any IO problem */ public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println); } }); }
public static void main(String[] args) throws IOException { StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma")); QuestionToStatementTranslator translator = new QuestionToStatementTranslator(); IOUtils.console("question> ", question -> { Annotation ann = new Annotation(question); pipeline.annotate(ann); List<CoreLabel> tokens = ann.get(CoreAnnotations.TokensAnnotation.class); List<List<CoreLabel>> statements = translator.toStatement(tokens); for (List<CoreLabel> statement : statements) { System.out.println(" -> " + StringUtils.join(statement.stream().map(CoreLabel::word), " ")); } }); }
public static void main(String[] args) throws Exception { // just a simple test, to make sure stuff works Properties props = StringUtils.argsToProperties(args); RothCONLL04Reader reader = new RothCONLL04Reader(); reader.setLoggerLevel(Level.INFO); reader.setProcessor(new StanfordCoreNLP(props)); Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp"); System.out.println(AnnotationUtils.datasetToString(doc)); }
private static Annotation testAnnoation(String text,String[] args){ Annotation document = new Annotation(text); Properties props = StringUtils.argsToProperties(args); StanfordCoreNLP corenlp = new StanfordCoreNLP(props); corenlp.annotate(document); HybridCorefAnnotator hcoref = new HybridCorefAnnotator(props); hcoref.annotate(document); return document; }
public static void main(String[] args){ try{ Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner"); StanfordCoreNLP pipeline = new StanfordCoreNLP(); String sentence = "Barack Obama lives in America. Obama works for the Federal Goverment."; Annotation doc = new Annotation(sentence); pipeline.annotate(doc); RelationExtractorAnnotator r = new RelationExtractorAnnotator(props); r.annotate(doc); for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){ System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class)); List<RelationMention> rls = s.get(RelationMentionsAnnotation.class); for(RelationMention rl: rls){ System.out.println(rl.toString()); } } }catch(Exception e){ e.printStackTrace(); } } }
public static void main(String[] args) { Properties props = StringUtils.propFileToProperties("projects/core/src/edu/stanford/nlp/classify/mood.prop"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation happyAnnotation = new Annotation("I am so glad this is awesome"); pipeline.annotate(happyAnnotation); Annotation sadAnnotation = new Annotation("I am so gloomy and depressed"); pipeline.annotate(sadAnnotation); Annotation bothAnnotation = new Annotation("I am so gloomy gloomy gloomy gloomy glad"); pipeline.annotate(bothAnnotation); } }
private static void modifyUsingCoreNLPNER(Annotation doc) { Properties ann = new Properties(); ann.setProperty("annotators", "pos, lemma, ner"); StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false); pipeline.annotate(doc); for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) { List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); if (entities != null) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (EntityMention en : entities) { //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType()); Span s = en.getExtent(); Counter<String> allNertagforSpan = new ClassicCounter<>(); for (int i = s.start(); i < s.end(); i++) { allNertagforSpan.incrementCount(tokens.get(i).ner()); } String entityNertag = Counters.argmax(allNertagforSpan); en.setType(entityNertag); //System.out.println("new ner tag is " + entityNertag); } } } }
public static void main(String[] args) { String text; if (args.length > 0) { text = IOUtils.slurpFileNoExceptions(args[0], "utf-8"); } else { text = "I can almost always tell when movies use fake dinosaurs."; } Annotation ann = new Annotation(text); Properties props = PropertiesUtils.asProperties( "annotators", "tokenize,ssplit,pos,depparse", "depparse.model", DependencyParser.DEFAULT_MODEL ); AnnotationPipeline pipeline = new StanfordCoreNLP(props); pipeline.annotate(ann); for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph sg = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); log.info(IOUtils.eolChar + sg.toString(SemanticGraph.OutputFormat.LIST)); } }