edu.stanford.nlp.pipeline.StanfordCoreNLP.<init> java code examples

  private static StanfordCoreNLP getStanfordCoreNLP(){
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    return new StanfordCoreNLP(props);
  }
}

 import java.util.Properties;

import edu.stanford.nlp.pipeline.StanfordCoreNLP;


public class NLP {

  /**
   * @param args
   */
  public static void main(String[] args) {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP coreNLP = new StanfordCoreNLP(props);
  }

}

 public SUTimePipeline(Properties props) {
  // By default, we want to tokenize the text, split it into
  // sentences, and then put it through the sutime annotator.
  // We also want to pos tag it and put it through the number and
  // qen annotators.

  // Since there will be different options for the sutime annotator,
  // we will actually create a new sutime annotator for each query.
  // This should be inexpensive.

  if (props.getProperty("annotators") == null) {
   props.setProperty("annotators",
            "tokenize, ssplit, pos");
//      "tokenize, ssplit, pos, number, qen");
  }
/*    if (props.getProperty("customAnnotatorClass.number") == null) {
   props.setProperty("customAnnotatorClass.number",
            "edu.stanford.nlp.pipeline.NumberAnnotator");
  }
  if (props.getProperty("customAnnotatorClass.qen") == null) {
   props.setProperty("customAnnotatorClass.qen",
    "edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator");
  }    */
  // this replicates the tokenizer behavior in StanfordCoreNLP
  props.setProperty("tokenize.options", "invertible,ptb3Escaping=true");
  this.pipeline = new StanfordCoreNLP(props);
 }

/** Load Stanford Processor: skip unnecessary annotator */
protected static StanfordCoreNLP loadStanfordProcessor(Properties props) {
 boolean replicateCoNLL = Boolean.parseBoolean(props.getProperty(Constants.REPLICATECONLL_PROP, "false"));
 Properties pipelineProps = new Properties(props);
 StringBuilder annoSb = new StringBuilder("");
 if (!Constants.USE_GOLD_POS && !replicateCoNLL)  {
  annoSb.append("pos, lemma");
 } else {
  annoSb.append("lemma");
 }
 if(Constants.USE_TRUECASE) {
  annoSb.append(", truecase");
 }
 if (!Constants.USE_GOLD_NE && !replicateCoNLL)  {
  annoSb.append(", ner");
 }
 if (!Constants.USE_GOLD_PARSES && !replicateCoNLL)  {
  annoSb.append(", parse");
 }
 String annoStr = annoSb.toString();
 SieveCoreferenceSystem.logger.info("MentionExtractor ignores specified annotators, using annotators=" + annoStr);
 pipelineProps.setProperty("annotators", annoStr);
 return new StanfordCoreNLP(pipelineProps, false);
}

public static void processCoreNLPIfDoesNotExist(File processedFile, Properties coreNLPProps, String text) {
 if (!processedFile.exists()) {
  try {
   StanfordCoreNLP coreNLP = new StanfordCoreNLP(coreNLPProps);
   Annotation processedAnnotation = coreNLP.process(text); //this document holds the split for paragraphs.
   ProtobufAnnotationSerializer pas = new ProtobufAnnotationSerializer(true);
   OutputStream fos = new BufferedOutputStream(new FileOutputStream(processedFile.getAbsolutePath()));
   pas.write(processedAnnotation, fos);
  } catch (IOException e) {
   e.printStackTrace();
  }
 }
}

@Override
public void init() throws ServletException {
 pipeline = new StanfordCoreNLP();
 String xslPath = getServletContext().
           getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl");
 try {
  Builder builder = new Builder();
  Document stylesheet = builder.build(new File(xslPath));
  corenlpTransformer = new XSLTransform(stylesheet);
 } catch (Exception e) {
  throw new ServletException(e);
 }
}

public static void main(String[] args) throws IOException {
 Properties props = StringUtils.argsToProperties(args);
 AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
 r.setLoggerLevel(Level.INFO);
 r.parse("/u/scr/nlp/data/ACE2005/");
 // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
 // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
 log.info("done");
}

 public static void runTest(String test, String num) {
  System.out.println("Testing: " + test + " : num newline breaks: " + num);
  Annotation ann = new Annotation(test);

  Properties props = new Properties();
  props.setProperty("annotators", "tokenize,ssplit");
  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  pipeline.annotate(ann);

  Properties propsPara = new Properties();
  propsPara.setProperty("paragraphBreak", num);
  ParagraphAnnotator para = new ParagraphAnnotator(propsPara, true);
  para.annotate(ann);

  for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
   System.out.println(sent);
   System.out.println(sent.get(CoreAnnotations.ParagraphIndexAnnotation.class));
  }
 }
}

private StanfordCoreNLP getStanfordCoreNLP(Properties props) {
 if (coreNLP != null) {
  return coreNLP;
 }
 Properties pipelineProps = new Properties(props);
 if (CorefProperties.conll(props)) {
  pipelineProps.setProperty("annotators", (CorefProperties.getLanguage(props) == Locale.CHINESE ?
      "lemma, ner" : "lemma") + (CorefProperties.useGoldMentions(props) ? "" : ", coref.mention"));
  pipelineProps.setProperty("ner.applyFineGrained", "false");
 } else {
  pipelineProps.setProperty("annotators", "pos, lemma, ner, " +
      (CorefProperties.useConstituencyParse(props) ? "parse" : "depparse") +
      (CorefProperties.useGoldMentions(props) ? "" : ", coref.mention"));
  pipelineProps.setProperty("ner.applyFineGrained", "false");
 }
 return (coreNLP = new StanfordCoreNLP(pipelineProps, false));
}

public static void testPP(String familyFile, String animateFile, String genderFile,
             String charactersFile, String modelFile) throws IOException, ClassNotFoundException {
 Properties props = new Properties();
 props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote, quoteattribution");
 props.setProperty("quoteattribution.familyWordsFile", familyFile);
 props.setProperty("quoteattribution.animacyWordsFile", animateFile);
 props.setProperty("quoteattribution.genderNamesFile", genderFile);
 props.setProperty("quoteattribution.charactersPath", charactersFile);
 props.setProperty("quoteattribution.modelPath", modelFile);
 
 StanfordCoreNLP coreNLP = new StanfordCoreNLP(props);
 Annotation processedAnnotation = coreNLP.process(test);
 List<CoreMap> quotes = processedAnnotation.get(CoreAnnotations.QuotationsAnnotation.class);
 for(CoreMap quote : quotes) {
  System.out.println("Quote: " + quote.get(CoreAnnotations.TextAnnotation.class));
  if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
   System.out.println("Predicted Mention: " + quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.MentionSieveAnnotation.class));
  } else {
   System.out.println("Predicted Mention: none");
  }
  if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) != null) {
   System.out.println("Predicted Speaker: " + quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class));
  } else {
   System.out.println("Predicted Speaker: none");
  }
  System.out.println("====");
 }
 System.out.println("Finished");
}

 /**
  * A debugging method to try entity linking sentences from the console.
  * @throws IOException
  */
 public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink");
  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
   Annotation ann = new Annotation(line);
   pipeline.annotate(ann);
   List<CoreLabel> tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
   System.err.println(StringUtils.join(tokens.stream().map(x -> x.get(CoreAnnotations.WikipediaEntityAnnotation.class)), "  "));
  });
 }
}

public static void main(String[] args) throws Exception {
 Properties props = StringUtils.argsToProperties(args);
 StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 String file = props.getProperty("file");
 String loadFile = props.getProperty("loadFile");
 if (loadFile != null && ! loadFile.isEmpty()) {
  CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
  InputStream is = new FileInputStream(loadFile);
  Pair<Annotation, InputStream> pair = ser.read(is);
  pair.second.close();
  Annotation anno = pair.first;
  System.out.println(anno.toShorterString(StringUtils.EMPTY_STRING_ARRAY));
  is.close();
 } else if (file != null && ! file.equals("")) {
  String text = edu.stanford.nlp.io.IOUtils.slurpFile(file);
  Annotation doc = new Annotation(text);
  pipeline.annotate(doc);
  CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
  PrintStream os = new PrintStream(new FileOutputStream(file + ".ser"));
  ser.write(doc, os).close();
  log.info("Serialized annotation saved in " + file + ".ser");
 } else {
  log.info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
 }
}

/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException If any IO problem
 */
public static void main(String[] args) throws IOException {
 Properties props = StringUtils.argsToProperties(args);
 props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
 props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
 StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 IOUtils.console("sentence> ", line -> {
  Annotation ann = new Annotation(line);
  pipeline.annotate(ann);
  for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
   sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
  }
 });
}

public static void main(String[] args) throws IOException {
 StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma"));
 QuestionToStatementTranslator translator = new QuestionToStatementTranslator();
 IOUtils.console("question> ", question -> {
  Annotation ann = new Annotation(question);
  pipeline.annotate(ann);
  List<CoreLabel> tokens = ann.get(CoreAnnotations.TokensAnnotation.class);
  List<List<CoreLabel>> statements = translator.toStatement(tokens);
  for (List<CoreLabel> statement : statements) {
   System.out.println("  -> " + StringUtils.join(statement.stream().map(CoreLabel::word), " "));
  }
 });
}

public static void main(String[] args) throws Exception {
 // just a simple test, to make sure stuff works
 Properties props = StringUtils.argsToProperties(args);
 RothCONLL04Reader reader = new RothCONLL04Reader();
 reader.setLoggerLevel(Level.INFO);
 reader.setProcessor(new StanfordCoreNLP(props));
 Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
 System.out.println(AnnotationUtils.datasetToString(doc));
}

private static Annotation testAnnoation(String text,String[] args){
 Annotation document = new Annotation(text);
 Properties props = StringUtils.argsToProperties(args);
 StanfordCoreNLP corenlp = new StanfordCoreNLP(props);
 corenlp.annotate(document);
 HybridCorefAnnotator hcoref = new HybridCorefAnnotator(props);
 hcoref.annotate(document);
 return document;
}

 public static void main(String[] args){
  try{
   Properties props = StringUtils.argsToProperties(args);
   props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
   StanfordCoreNLP pipeline = new StanfordCoreNLP();
   String sentence = "Barack Obama lives in America. Obama works for the Federal Goverment.";
   Annotation doc = new Annotation(sentence);
   pipeline.annotate(doc);
   RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
   r.annotate(doc);
   for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
    System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
    List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
    for(RelationMention rl: rls){
     System.out.println(rl.toString());
    }
   }
  }catch(Exception e){
   e.printStackTrace();
  }
 }
}

 public static void main(String[] args) {

  Properties props = StringUtils.propFileToProperties("projects/core/src/edu/stanford/nlp/classify/mood.prop");
  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

  Annotation happyAnnotation = new Annotation("I am so glad this is awesome");
  pipeline.annotate(happyAnnotation);
  Annotation sadAnnotation = new Annotation("I am so gloomy and depressed");
  pipeline.annotate(sadAnnotation);
  Annotation bothAnnotation = new Annotation("I am so gloomy gloomy gloomy gloomy glad");
  pipeline.annotate(bothAnnotation);
 }
}

private static void modifyUsingCoreNLPNER(Annotation doc) {
 Properties ann = new Properties();
 ann.setProperty("annotators", "pos, lemma, ner");
 StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
 pipeline.annotate(doc);
 for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
  List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
  if (entities != null) {
   List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
   for (EntityMention en : entities) {
    //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
    Span s = en.getExtent();
    Counter<String> allNertagforSpan = new ClassicCounter<>();
    for (int i = s.start(); i < s.end(); i++) {
     allNertagforSpan.incrementCount(tokens.get(i).ner());
    }
    String entityNertag = Counters.argmax(allNertagforSpan);
    en.setType(entityNertag);
    //System.out.println("new ner tag is " + entityNertag);
   }
  }
 }
}

public static void main(String[] args) {
 String text;
 if (args.length > 0) {
  text = IOUtils.slurpFileNoExceptions(args[0], "utf-8");
 } else {
  text = "I can almost always tell when movies use fake dinosaurs.";
 }
 Annotation ann = new Annotation(text);
 Properties props = PropertiesUtils.asProperties(
     "annotators", "tokenize,ssplit,pos,depparse",
     "depparse.model", DependencyParser.DEFAULT_MODEL
 );
 AnnotationPipeline pipeline = new StanfordCoreNLP(props);
 pipeline.annotate(ann);
 for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
  SemanticGraph sg = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
  log.info(IOUtils.eolChar + sg.toString(SemanticGraph.OutputFormat.LIST));
 }
}

Javadoc

Constructs a pipeline using as properties the properties file found in the classpath

Popular methods of StanfordCoreNLP

annotate
Annotate the CoreDocument wrapper.
process
Runs the entire pipeline on the content of the given text passed in.
timingInformation
prettyPrint
Displays the output of all annotators in a format easily readable by people.
xmlPrint
Wrapper around xmlPrint(Annotation, OutputStream). Added for backward compatibility.
addAnnotator
getDefaultAnnotatorPool
Construct the default annotator pool, and save it as the static annotator pool for CoreNLP.
getExistingAnnotator
getProperties
Fetches the Properties object used to construct this Annotator
getRequiredProperty
getTotalTime
loadProperties

Popular in Java

Running tasks concurrently on multiple threads
getExternalFilesDir (Context)
getResourceAsStream (ClassLoader)
putExtra (Intent)
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Top plugins for WebStorm

How to use edu.stanford.nlp.pipeline.StanfordCoreNLPconstructor

Best Java code snippets using edu.stanford.nlp.pipeline.StanfordCoreNLP.<init> (Showing top 20 results out of 315)

How to use
edu.stanford.nlp.pipeline.StanfordCoreNLP
constructor