@Override protected DoccatModel loadModel(InputStream in) throws IOException { return new DoccatModel(in); } }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
public DoccatModel(String languageCode, MaxentModel doccatModel, Map<String, String> manifestInfoEntries, DoccatFactory factory) { super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel); checkArtifactMap(); }
@Test public void testDefault() throws IOException { DoccatModel model = train(); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); DoccatFactory factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(1, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); }
actg, parser); GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/> new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
public void init(NamedList initArgs) { SolrParams params = SolrParams.toSolrParams(initArgs); String modelDirectory = params.get("modelDirectory", System.getProperty("model.dir"));//<co id="qqpp.model"/> String wordnetDirectory = params.get("wordnetDirectory", System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/> if (modelDirectory != null) { File modelsDir = new File(modelDirectory); try { InputStream chunkerStream = new FileInputStream( new File(modelsDir,"en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/> InputStream posStream = new FileInputStream( new File(modelsDir,"en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); tagger = new POSTaggerME(posModel); //<co id="qqpp.tagger"/> model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/> new File(modelDirectory,"en-answer.bin"))) .getChunkerModel(); probs = new double[model.getNumOutcomes()]; atcg = new AnswerTypeContextGenerator( new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/> } catch (IOException e) { throw new RuntimeException(e); } } } /*
public String getAllResults(double[] results) { return model.getMaxentModel().getAllOutcomes(results); }
public void train(String source, String destination) throws IOException { //<start id="maxent.examples.train.setup"/> File[] inputFiles = FileUtil.buildFileList(new File(source)); File modelFile = new File(destination); Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="tm.tok"/> CategoryDataStream ds = new CategoryDataStream(inputFiles, tokenizer); int cutoff = 5; int iterations = 100; NameFinderFeatureGenerator nffg //<co id="tm.fg"/> = new NameFinderFeatureGenerator(); BagOfWordsFeatureGenerator bowfg = new BagOfWordsFeatureGenerator(); DoccatModel model = DocumentCategorizerME.train("en", ds, cutoff, iterations, nffg, bowfg); //<co id="tm.train"/> model.serialize(new FileOutputStream(modelFile)); /*<calloutlist> <callout arearefs="tm.tok">Create data stream</callout> <callout arearefs="tm.fg">Set up features generators</callout> <callout arearefs="tm.train">Train categorizer</callout> </calloutlist>*/ //<end id="maxent.examples.train.setup"/> }
@Test public void testCustom() throws IOException { FeatureGenerator[] featureGenerators = { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator(), new NGramFeatureGenerator(2,3) }; DoccatFactory factory = new DoccatFactory(featureGenerators); DoccatModel model = train(factory); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(3, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); Assert.assertEquals(NGramFeatureGenerator.class, factory.getFeatureGenerators()[1].getClass()); Assert.assertEquals(NGramFeatureGenerator.class,factory.getFeatureGenerators()[2].getClass()); }
public String getBestCategory(double[] outcome) { return model.getMaxentModel().getBestOutcome(outcome); }
@Override protected DoccatModel loadModel(InputStream modelIn) throws IOException { return new DoccatModel(modelIn); }
public int getIndex(String category) { return model.getMaxentModel().getIndex(category); }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
public DoccatModel(String languageCode, MaxentModel doccatModel, Map<String, String> manifestInfoEntries, DoccatFactory factory) { super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel); checkArtifactMap(); }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
public String getCategory(int index) { return model.getMaxentModel().getOutcome(index); }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
public DoccatModel(String languageCode, MaxentModel doccatModel, Map<String, String> manifestInfoEntries, DoccatFactory factory) { super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel); checkArtifactMap(); }
@Override protected DoccatModel loadModel(InputStream in) throws IOException { return new DoccatModel(in); } }
public int getNumberOfCategories() { return model.getMaxentModel().getNumOutcomes(); }