opennlp.tools.doccat.DocumentCategorizerME.<init> java code examples

public void initialize(UimaContext context)
  throws ResourceInitializationException {
 super.initialize(context);
 this.context = context;
 Logger mLogger = context.getLogger();
 if (mLogger.isLoggable(Level.INFO)) {
  mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
 }
 DoccatModel model;
 try {
  DoccatModelResource modelResource = (DoccatModelResource) context
    .getResourceObject(UimaUtil.MODEL_PARAMETER);
  model = modelResource.getModel();
 } catch (ResourceAccessException e) {
  throw new ResourceInitializationException(e);
 }
 mCategorizer = new DocumentCategorizerME(model);
}

new DocumentCategorizerME(model),
listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<DocumentSample> samples, int nFolds)
  throws IOException {
 CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>(
   samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
    .next();
  DoccatModel model = DocumentCategorizerME.train(languageCode,
    trainingSampleStream, params, factory);
  DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator(
    new DocumentCategorizerME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  documentAccuracy.add(evaluator.getAccuracy(),
    evaluator.getDocumentCount());
 }
}

DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);

@Test
public void testSimpleTraining() throws IOException {
 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
   new DocumentSample("1", new String[]{"a", "b", "c"}),
   new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}),
   new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}),
   new DocumentSample("0", new String[]{"x", "y", "z"}),
   new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}),
   new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 0);
 DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
     params, new DoccatFactory());
 DocumentCategorizer doccat = new DocumentCategorizerME(model);
 double[] aProbs = doccat.categorize(new String[]{"a"});
 Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 double[] bProbs = doccat.categorize(new String[]{"x"});
 Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 //test to make sure sorted map's last key is cat 1 because it has the highest score.
 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
 Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
 Assert.assertEquals(1, cat.size());
}

  params, new DoccatFactory());
DocumentCategorizer doccat = new DocumentCategorizerME(model);

DocumentCategorizerME doccat = new DocumentCategorizerME(model);

DoccatModel model = new DoccatModel(modelStream);
 = new DocumentCategorizerME(model, nffg, bowfg);
Tokenizer tokenizer = SimpleTokenizer.INSTANCE;

public void initialize(UimaContext context)
  throws ResourceInitializationException {
 super.initialize(context);
 this.context = context;
 Logger mLogger = context.getLogger();
 if (mLogger.isLoggable(Level.INFO)) {
  mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
 }
 DoccatModel model;
 try {
  DoccatModelResource modelResource = (DoccatModelResource) context
    .getResourceObject(UimaUtil.MODEL_PARAMETER);
  model = modelResource.getModel();
 } catch (ResourceAccessException e) {
  throw new ResourceInitializationException(e);
 }
 mCategorizer = new DocumentCategorizerME(model);
}

new DocumentCategorizerME(model),
listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));

new DocumentCategorizerME(model),
listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));

DocumentCategorizerME categorizer = new DocumentCategorizerME(doccatModel);
NameFinderME[] nameFinderMEs = new NameFinderME[tokenNameFinderModels.size()];
for (int i = 0; i < tokenNameFinderModels.size(); i++) {

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<DocumentSample> samples, int nFolds)
  throws IOException {
 CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>(
   samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
    .next();
  DoccatModel model = DocumentCategorizerME.train(languageCode,
    trainingSampleStream, params, factory);
  DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator(
    new DocumentCategorizerME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  documentAccuracy.add(evaluator.getAccuracy(),
    evaluator.getDocumentCount());
 }
}

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<DocumentSample> samples, int nFolds)
  throws IOException {
 CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>(
   samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
    .next();
  DoccatModel model = DocumentCategorizerME.train(languageCode,
    trainingSampleStream, params, factory);
  DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator(
    new DocumentCategorizerME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  documentAccuracy.add(evaluator.getAccuracy(),
    evaluator.getDocumentCount());
 }
}

@Override
public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException
{
  DoccatModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
      new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
  
  DocumentCategorizerME finder = new DocumentCategorizerME(model);
  Type sentenceType = getType(aCas, Sentence.class);
  Type predictionType = getAnnotationType(aCas, PredictedSpan.class);
  Type tokenType = getType(aCas, Token.class);
  Feature confidenceFeature = predictionType.getFeatureByBaseName("score");
  Feature labelFeature = predictionType.getFeatureByBaseName("label");
  for (AnnotationFS sentence : select(aCas, sentenceType)) {
    List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
    String[] tokens = tokenAnnotations.stream()
      .map(AnnotationFS::getCoveredText)
      .toArray(String[]::new);
    double[] outcome = finder.categorize(tokens);
    String label = finder.getBestCategory(outcome);
    
    AnnotationFS annotation = aCas.createAnnotation(predictionType, sentence.getBegin(),
        sentence.getEnd());
    annotation.setDoubleValue(confidenceFeature, NumberUtils.max(outcome));
    annotation.setStringValue(labelFeature, label);
    aCas.addFsToIndexes(annotation);
  }
}

DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);

DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);

Javadoc

Initializes the current instance with a doccat model. Default feature generation is used.

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
startActivity (Activity)
setRequestProperty (URLConnection)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Top Vim plugins

How to use opennlp.tools.doccat.DocumentCategorizerMEconstructor

Best Java code snippets using opennlp.tools.doccat.DocumentCategorizerME.<init> (Showing top 17 results out of 315)

How to use
opennlp.tools.doccat.DocumentCategorizerME
constructor