edu.stanford.nlp.pipeline.POSTaggerAnnotator java code examples

/**
 * Part of speech tag
 */
public Annotator posTagger(Properties properties) {
 String annotatorName = "pos";
 return new POSTaggerAnnotator(annotatorName, properties);
}

/** Create a POS tagger annotator.
 *
 *  @param posLoc Location of POS tagger model (may be file path, classpath resource, or URL
 *  @param verbose Whether to show verbose information on model loading
 *  @param maxSentenceLength Sentences longer than this length will be skipped in processing
 *  @param numThreads The number of threads for the POS tagger annotator to use
 */
public POSTaggerAnnotator(String posLoc, boolean verbose, int maxSentenceLength, int numThreads) {
 this(loadModel(posLoc, verbose), maxSentenceLength, numThreads);
}

@Override
public void annotate(Annotation annotation) {
 // turn the annotation into a sentence
 if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  if (nThreads == 1) {
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    doOneSentence(sentence);
   }
  } else {
   MulticoreWrapper<CoreMap, CoreMap> wrapper = new MulticoreWrapper<>(nThreads, new POSTaggerProcessor());
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    wrapper.put(sentence);
    while (wrapper.peek()) {
     wrapper.poll();
    }
   }
   wrapper.join();
   while (wrapper.peek()) {
    wrapper.poll();
   }
  }
 } else {
  throw new RuntimeException("unable to find words/tokens in: " + annotation);
 }
}

posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

/**
 * Takes in a list of words and POS tags them. Tagging is done in place - the
 * returned CoreLabels are the same ones you passed in, with tags added.
 *
 * @param text
 *          List of tokens to tag
 * @return Tokens with tags
 */
public List<? extends CoreLabel> processText(List<? extends CoreLabel> text) {
 // cdm 2009: copying isn't necessary; the POS tagger's apply()
 // method does not change the parameter passed in. But I think you
 // can't have it correctly generic without copying. Sigh.
 // if the text size is more than the max length allowed
 if (text.size() > maxSentenceLength) {
  return processTextLargerThanMaxLen(text);
 }
 ArrayList<TaggedWord> tagged = pos.apply(new ArrayList<CoreLabel>(text));
 // copy in the tags
 Iterator<TaggedWord> taggedIter = tagged.iterator();
 for (CoreLabel word : text) {
  TaggedWord cur = taggedIter.next();
  word.setTag(cur.tag());
 }
 return text;
}

posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

private static AnnotationPipeline makeNumericPipeline() {
 AnnotationPipeline pipeline = new AnnotationPipeline();
 pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
 pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
 pipeline.addAnnotator(new POSTaggerAnnotator(false));
 pipeline.addAnnotator(new TimeAnnotator(true));
 return pipeline;
}

public POSTaggerAnnotator(String annotatorName, Properties props) {
 String posLoc = props.getProperty(annotatorName + ".model");
 if (posLoc == null) {
  posLoc = DefaultPaths.DEFAULT_POS_MODEL;
 }
 boolean verbose = PropertiesUtils.getBool(props, annotatorName + ".verbose", false);
 this.pos = loadModel(posLoc, verbose);
 this.maxSentenceLength = PropertiesUtils.getInt(props, annotatorName + ".maxlen", Integer.MAX_VALUE);
 this.nThreads = PropertiesUtils.getInt(props, annotatorName + ".nthreads", PropertiesUtils.getInt(props, "nthreads", 1));
 this.reuseTags = PropertiesUtils.getBool(props, annotatorName + ".reuseTags", false);
}

@Override
public void annotate(Annotation annotation) {
 // turn the annotation into a sentence
 if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  if (nThreads == 1) {
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    doOneSentence(sentence);
   }
  } else {
   MulticoreWrapper<CoreMap, CoreMap> wrapper = new MulticoreWrapper<>(nThreads, new POSTaggerProcessor());
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    wrapper.put(sentence);
    while (wrapper.peek()) {
     wrapper.poll();
    }
   }
   wrapper.join();
   while (wrapper.peek()) {
    wrapper.poll();
   }
  }
 } else {
  throw new RuntimeException("unable to find words/tokens in: " + annotation);
 }
}

posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

 public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception {
//    useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false"));
  AnnotationPipeline pipeline = new AnnotationPipeline();
  if (tokenize) {
   pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
   pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
  }
  pipeline.addAnnotator(new POSTaggerAnnotator(false));
//    pipeline.addAnnotator(new NumberAnnotator(false));
//    pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false));
  String timeAnnotator = props.getProperty("timeAnnotator", "sutime");
  switch (timeAnnotator) {
   case "gutime":
    useGUTime = true;
    pipeline.addAnnotator(new GUTimeAnnotator("gutime", props));
    break;
   case "heideltime":
    requiredDocDateFormat = "yyyy-MM-dd";
    pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props));
    break;
   case "sutime":
    pipeline.addAnnotator(new TimeAnnotator("sutime", props));
    break;
   default:
    throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator);
  }
  return pipeline;
 }

/** Create a POS tagger annotator.
 *
 *  @param posLoc Location of POS tagger model (may be file path, classpath resource, or URL
 *  @param verbose Whether to show verbose information on model loading
 *  @param maxSentenceLength Sentences longer than this length will be skipped in processing
 *  @param numThreads The number of threads for the POS tagger annotator to use
 */
public POSTaggerAnnotator(String posLoc, boolean verbose, int maxSentenceLength, int numThreads) {
 this(loadModel(posLoc, verbose), maxSentenceLength, numThreads);
}

@Override
public void annotate(Annotation annotation) {
 // turn the annotation into a sentence
 if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
  if (nThreads == 1) {
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    doOneSentence(sentence);
   }
  } else {
   MulticoreWrapper<CoreMap, CoreMap> wrapper = new MulticoreWrapper<CoreMap, CoreMap>(nThreads, new POSTaggerProcessor());
   for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
    wrapper.put(sentence);
    while (wrapper.peek()) {
     wrapper.poll();
    }
   }
   wrapper.join();
   while (wrapper.peek()) {
    wrapper.poll();
   }
  }
 } else {
  throw new RuntimeException("unable to find words/tokens in: " + annotation);
 }
}

posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

/**
 * Part of speech tag
 */
public Annotator posTagger(Properties properties) {
 String annotatorName = "pos";
 return new POSTaggerAnnotator(annotatorName, properties);
}

/** Create a POS tagger annotator.
 *
 *  @param posLoc Location of POS tagger model (may be file path, classpath resource, or URL
 *  @param verbose Whether to show verbose information on model loading
 *  @param maxSentenceLength Sentences longer than this length will be skipped in processing
 *  @param numThreads The number of threads for the POS tagger annotator to use
 */
public POSTaggerAnnotator(String posLoc, boolean verbose, int maxSentenceLength, int numThreads) {
 this(loadModel(posLoc, verbose), maxSentenceLength, numThreads);
}

@Override
public Annotator create() {
 try {
  return new POSTaggerAnnotator("pos", properties);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

public POSTaggerAnnotator(String posLoc, boolean verbose, int maxSentenceLength) {
 this(loadModel(posLoc, verbose), verbose, maxSentenceLength);
}

 public Annotator create() {
  try {
   String maxLenStr = props.getProperty("pos.maxlen");
   int maxLen = Integer.MAX_VALUE;
   if(maxLenStr != null) maxLen = Integer.parseInt(maxLenStr);
   return new POSTaggerAnnotator(props.getProperty("pos.model", DefaultPaths.DEFAULT_POS_MODEL), true, maxLen);
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 }
});

public POSTaggerAnnotator(String annotatorName, Properties props) {
 String posLoc = props.getProperty(annotatorName + ".model");
 if (posLoc == null) {
  posLoc = DefaultPaths.DEFAULT_POS_MODEL;
 }
 boolean verbose = PropertiesUtils.getBool(props, annotatorName + ".verbose", false);
 this.pos = loadModel(posLoc, verbose);
 this.maxSentenceLength = PropertiesUtils.getInt(props, annotatorName + ".maxlen", Integer.MAX_VALUE);
 this.nThreads = PropertiesUtils.getInt(props, annotatorName + ".nthreads", PropertiesUtils.getInt(props, "nthreads", 1));
 this.reuseTags = PropertiesUtils.getBool(props, annotatorName + ".reuseTags", false);
}

Javadoc

Wrapper for the maxent part of speech tagger.

Most used methods

<init>
loadModel
doOneSentence
annotate
processTextLargerThanMaxLen
if the text length is more than specified than the text is divided into (length/MaxLen) sentences an
signature

Popular in Java

Making http post requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
onCreateOptionsMenu (Activity)
setContentView (Activity)
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Github Copilot alternatives

How to usePOSTaggerAnnotator in edu.stanford.nlp.pipeline

Best Java code snippets using edu.stanford.nlp.pipeline.POSTaggerAnnotator (Showing top 20 results out of 315)

How to use
POSTaggerAnnotator
in
edu.stanford.nlp.pipeline