opennlp.tools.doccat.DoccatModel java code examples

 @Override
 protected DoccatModel loadModel(InputStream in) throws IOException {
  return new DoccatModel(in);
 }
}

/**
 * Initializes the current instance with a doccat model. Default feature
 * generation is used.
 *
 * @param model the doccat model
 */
public DocumentCategorizerME(DoccatModel model) {
 this.model = model;
 this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model
   .getFactory().getFeatureGenerators());
}

public DoccatModel(String languageCode, MaxentModel doccatModel,
  Map<String, String> manifestInfoEntries, DoccatFactory factory) {
 super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
 artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel);
 checkArtifactMap();
}

@Test
public void testDefault() throws IOException {
 DoccatModel model = train();
 Assert.assertNotNull(model);
 ByteArrayOutputStream out = new ByteArrayOutputStream();
 model.serialize(out);
 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
 DoccatModel fromSerialized = new DoccatModel(in);
 DoccatFactory factory = fromSerialized.getFactory();
 Assert.assertNotNull(factory);
 Assert.assertEquals(1, factory.getFeatureGenerators().length);
 Assert.assertEquals(BagOfWordsFeatureGenerator.class,
   factory.getFeatureGenerators()[0].getClass());
}

    actg, parser);
GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/>
new DoccatModel("en", model).serialize(new FileOutputStream(outFile));

public void init(NamedList initArgs) {
 SolrParams params = SolrParams.toSolrParams(initArgs);
 String modelDirectory = params.get("modelDirectory",
     System.getProperty("model.dir"));//<co id="qqpp.model"/>
 String wordnetDirectory = params.get("wordnetDirectory",
     System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/>
 if (modelDirectory != null) {
  File modelsDir = new File(modelDirectory);
  try {
   InputStream chunkerStream = new FileInputStream(
     new File(modelsDir,"en-chunker.bin"));
   ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
   chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/>
   InputStream posStream = new FileInputStream(
     new File(modelsDir,"en-pos-maxent.bin"));
   POSModel posModel = new POSModel(posStream);
   tagger =  new POSTaggerME(posModel); //<co id="qqpp.tagger"/>
   model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/>
     new File(modelDirectory,"en-answer.bin")))
     .getChunkerModel();
   probs = new double[model.getNumOutcomes()];
   atcg = new AnswerTypeContextGenerator(
       new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/>
  } catch (IOException e) {
   throw new RuntimeException(e);
  }
 }
}
/*

public String getAllResults(double[] results) {
 return model.getMaxentModel().getAllOutcomes(results);
}

 public void train(String source, String destination) throws IOException {
  //<start id="maxent.examples.train.setup"/> 
  File[] inputFiles = FileUtil.buildFileList(new File(source));
  File modelFile = new File(destination);
  
  Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="tm.tok"/>
  CategoryDataStream ds = new CategoryDataStream(inputFiles, tokenizer);

  int cutoff = 5;
  int iterations = 100;
  NameFinderFeatureGenerator nffg //<co id="tm.fg"/>
   = new NameFinderFeatureGenerator();
  BagOfWordsFeatureGenerator bowfg 
   = new BagOfWordsFeatureGenerator();

  DoccatModel model = DocumentCategorizerME.train("en", 
    ds, cutoff, iterations, nffg, bowfg); //<co id="tm.train"/>
  model.serialize(new FileOutputStream(modelFile));
  
/*<calloutlist>
<callout arearefs="tm.tok">Create data stream</callout>
<callout arearefs="tm.fg">Set up features generators</callout> 
<callout arearefs="tm.train">Train categorizer</callout>  
</calloutlist>*/
//<end id="maxent.examples.train.setup"/>
 }

@Test
public void testCustom() throws IOException {
 FeatureGenerator[] featureGenerators = { new BagOfWordsFeatureGenerator(),
   new NGramFeatureGenerator(), new NGramFeatureGenerator(2,3) };
 DoccatFactory factory = new DoccatFactory(featureGenerators);
 DoccatModel model = train(factory);
 Assert.assertNotNull(model);
 ByteArrayOutputStream out = new ByteArrayOutputStream();
 model.serialize(out);
 ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
 DoccatModel fromSerialized = new DoccatModel(in);
 factory = fromSerialized.getFactory();
 Assert.assertNotNull(factory);
 Assert.assertEquals(3, factory.getFeatureGenerators().length);
 Assert.assertEquals(BagOfWordsFeatureGenerator.class,
   factory.getFeatureGenerators()[0].getClass());
 Assert.assertEquals(NGramFeatureGenerator.class,
   factory.getFeatureGenerators()[1].getClass());
 Assert.assertEquals(NGramFeatureGenerator.class,factory.getFeatureGenerators()[2].getClass());
}

public String getBestCategory(double[] outcome) {
 return model.getMaxentModel().getBestOutcome(outcome);
}

@Override
protected DoccatModel loadModel(InputStream modelIn) throws IOException {
 return new DoccatModel(modelIn);
}

public int getIndex(String category) {
 return model.getMaxentModel().getIndex(category);
}

/**
 * Initializes the current instance with a doccat model. Default feature
 * generation is used.
 *
 * @param model the doccat model
 */
public DocumentCategorizerME(DoccatModel model) {
 this.model = model;
 this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model
   .getFactory().getFeatureGenerators());
}

public DoccatModel(String languageCode, MaxentModel doccatModel,
  Map<String, String> manifestInfoEntries, DoccatFactory factory) {
 super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
 artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel);
 checkArtifactMap();
}

 public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
   TrainingParameters mlParams, DoccatFactory factory)
     throws IOException {

  Map<String, String> manifestInfoEntries = new HashMap<>();

  EventTrainer trainer = TrainerFactory.getEventTrainer(
    mlParams, manifestInfoEntries);

  MaxentModel model = trainer.train(
    new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()));

  return new DoccatModel(languageCode, model, manifestInfoEntries, factory);
 }
}

public String getCategory(int index) {
 return model.getMaxentModel().getOutcome(index);
}

/**
 * Initializes the current instance with a doccat model. Default feature
 * generation is used.
 *
 * @param model the doccat model
 */
public DocumentCategorizerME(DoccatModel model) {
 this.model = model;
 this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model
   .getFactory().getFeatureGenerators());
}

public DoccatModel(String languageCode, MaxentModel doccatModel,
  Map<String, String> manifestInfoEntries, DoccatFactory factory) {
 super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
 artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel);
 checkArtifactMap();
}

 @Override
 protected DoccatModel loadModel(InputStream in) throws IOException {
  return new DoccatModel(in);
 }
}

public int getNumberOfCategories() {
 return model.getMaxentModel().getNumOutcomes();
}

Javadoc

A model for document categorization

Most used methods

Popular in Java

Parsing JSON documents to java classes using gson
setContentView (Activity)
getExternalFilesDir (Context)
findViewById (Activity)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
JFrame (javax.swing)
From CI to AI: The AI layer in your organization

How to useDoccatModel in opennlp.tools.doccat

Best Java code snippets using opennlp.tools.doccat.DoccatModel (Showing top 20 results out of 315)

How to use
DoccatModel
in
opennlp.tools.doccat