opennlp.tools.postag.POSTaggerME.<init> java code examples

public void startup() throws Exception {
  super.startup();
  InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model");
  InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap");
  tagger = new POSTaggerME(new POSModel(modelStream));
  modelStream.close();
  if (posMapperStream != null) {
    posMapper = new HashMap<String, String>();
    BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8"));
    String line;
    while ((line = br.readLine()) != null) {
      // skip comments and empty lines
      if (line.startsWith("#") || line.trim().equals(""))
        continue;
      // Entry format: POS GPOS, i.e. two space-separated entries per line
      StringTokenizer st = new StringTokenizer(line);
      String pos = st.nextToken();
      String gpos = st.nextToken();
      posMapper.put(pos, gpos);
    }
    posMapperStream.close();
  }
}

public void startup() throws Exception {
  super.startup();
  InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model");
  InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap");
  tagger = new POSTaggerME(new POSModel(modelStream));
  modelStream.close();
  if (posMapperStream != null) {
    posMapper = new HashMap<String, String>();
    BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8"));
    String line;
    while ((line = br.readLine()) != null) {
      // skip comments and empty lines
      if (line.startsWith("#") || line.trim().equals(""))
        continue;
      // Entry format: POS GPOS, i.e. two space-separated entries per line
      StringTokenizer st = new StringTokenizer(line);
      String pos = st.nextToken();
      String gpos = st.nextToken();
      posMapper.put(pos, gpos);
    }
    posMapperStream.close();
  }
}

/**
 * Initializes a new instance.
 *
 * @param aPosModel a POSTagger model.
 */
public POSTaggerNameFeatureGenerator(POSModel aPosModel) {
 this.posTagger = new POSTaggerME(aPosModel);
}

new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener,
reportListener);

public Parser(ParserModel model, int beamSize, double advancePercentage) {
 this(model.getBuildModel(), model.getCheckModel(),
   new POSTaggerME(model.getParserTaggerModel()),
   new ChunkerME(model.getParserChunkerModel()),
     model.getHeadRules(), beamSize, advancePercentage);
}

private void evalPosModel(POSModel model, BigInteger expectedHash) throws Exception {
 // break the input stream into sentences
 // The input stream is tokenized and can be processed here directly
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 POSTagger tagger = new POSTaggerME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   String[] tags = tagger.tag(line.getText());
   for (String tag : tags) {
    digest.update(tag.getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

public Parser(ParserModel model, int beamSize, double advancePercentage) {
 this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
   new POSTaggerME(model.getParserTaggerModel()),
   new ChunkerME(model.getParserChunkerModel()),
   model.getHeadRules(),
   beamSize, advancePercentage);
}

/**
 * Initializes the current instance with the given context.
 * <p>
 * Note: Do all initialization in this method, do not use the constructor.
 */
@Override
public void initialize(UimaContext context)
  throws ResourceInitializationException {
 super.initialize(context);
 this.context = context;
 this.logger = context.getLogger();
 if (this.logger.isLoggable(Level.INFO)) {
  this.logger.log(Level.INFO, "Initializing the OpenNLP "
    + "Part of Speech annotator.");
 }
 POSModel model;
 try {
  POSModelResource modelResource = (POSModelResource) context
    .getResourceObject(UimaUtil.MODEL_PARAMETER);
  model = modelResource.getModel();
 } catch (ResourceAccessException e) {
  throw new ResourceInitializationException(e);
 }
 Integer beamSize = AnnotatorUtil.getOptionalIntegerParameter(context,
   UimaUtil.BEAM_SIZE_PARAMETER);
 if (beamSize == null) {
  beamSize = POSTaggerME.DEFAULT_BEAM_SIZE;
 }
 this.posTagger = new POSTaggerME(model);
}

private void eval(POSModel model, File testData,
             double expectedAccuracy) throws IOException {
 ObjectStream<POSSample> samples = new ConllXPOSSampleStream(
   new MarkableFileInputStreamFactory(testData), StandardCharsets.UTF_8);
 POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model));
 evaluator.evaluate(samples);
 Assert.assertEquals(expectedAccuracy, evaluator.getWordAccuracy(), 0.0001);
}

@SuppressWarnings("unchecked")
public Event[] updateContext(Sequence sequence, AbstractModel model) {
 Sequence<POSSample> pss = sequence;
 POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory()));
 String[] sentence = pss.getSource().getSentence();
 Object[] ac = pss.getSource().getAddictionalContext();
 String[] tags = tagger.tag(pss.getSource().getSentence());
 Event[] events = new Event[sentence.length];
 POSSampleEventStream.generateEvents(sentence, tags, ac, pcg)
   .toArray(events);
 return events;
}

@Test
public void evalChunkerModel() throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 POSTagger tagger = new POSTaggerME(new POSModel(
   new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")));
 Chunker chunker = new ChunkerME(new ChunkerModel(
   new File(getOpennlpDataDir(), "models-sf/en-chunker.bin")));
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText()));
   String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags());
   for (String chunk : chunks) {
    digest.update(chunk.getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"),
   new BigInteger(1, digest.digest()));
}

POSTaggerME tagger = new POSTaggerME(model);

  params, this.factory);
POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model), listeners);

public OpenNlpTokenExtraction() throws IOException {
 InputStream modelIn = this.getClass().getClassLoader().getResourceAsStream("en-pos-maxent.bin");
 POSModel model = new POSModel(modelIn);
 posTagger = new POSTaggerME(model);
}

/**
 * Construct the POSModelResource from the inputstream.
 * @param in the input stream
 * @throws IOException io exception
 */
public POSModelResource(InputStream in) throws IOException {
 posModel = new POSModel(in);
 posTagger = new POSTaggerME(posModel);
}

@Test
public void testPOSTagger() throws IOException {
 POSModel posModel = trainPOSModel(ModelType.MAXENT);
 POSTagger tagger = new POSTaggerME(posModel);
 String[] tags = tagger.tag(new String[] {
   "The",
   "driver",
   "got",
   "badly",
   "injured",
   "."});
 Assert.assertEquals(6, tags.length);
 Assert.assertEquals("DT", tags[0]);
 Assert.assertEquals("NN", tags[1]);
 Assert.assertEquals("VBD", tags[2]);
 Assert.assertEquals("RB", tags[3]);
 Assert.assertEquals("VBN", tags[4]);
 Assert.assertEquals(".", tags[5]);
}

/**
 * Creates a tagger.
 * @param modelFile The part of speech model file.
 * @param sentenceDetector A {@link SentenceDetector}.
 * @param tokenizer A {@link Tokenizer}.
 * @throws ModelLoaderException
 */
public DefaultPartsOfSpeechTagger(String modelPath, StandardModelManifest modelManifest, ModelValidator validator) throws ModelLoaderException {
 LocalModelLoader<POSModel> posModelLoader = new LocalModelLoader<POSModel>(validator, modelPath);
 POSModel model = posModelLoader.getModel(modelManifest, POSModel.class);
 tagger = new POSTaggerME(model);
}

public Parser(ParserModel model, int beamSize, double advancePercentage) {
 this(model.getBuildModel(), model.getCheckModel(),
   new POSTaggerME(model.getParserTaggerModel()),
   new ChunkerME(model.getParserChunkerModel()),
     model.getHeadRules(), beamSize, advancePercentage);
}

public Parser(ParserModel model, int beamSize, double advancePercentage) {
 this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
   new POSTaggerME(model.getParserTaggerModel()),
   new ChunkerME(model.getParserChunkerModel()),
   model.getHeadRules(),
   beamSize, advancePercentage);
}

public Parser(ParserModel model, int beamSize, double advancePercentage) {
 this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
   new POSTaggerME(model.getParserTaggerModel()),
   new ChunkerME(model.getParserChunkerModel()),
   model.getHeadRules(),
   beamSize, advancePercentage);
}

Javadoc

Initializes the current instance with the provided model.

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
requestLocationUpdates (LocationManager)
runOnUiThread (Activity)
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Reference (javax.naming)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Best IntelliJ plugins

How to use opennlp.tools.postag.POSTaggerMEconstructor

Best Java code snippets using opennlp.tools.postag.POSTaggerME.<init> (Showing top 20 results out of 315)

How to use
opennlp.tools.postag.POSTaggerME
constructor