public static LanguageDetectorModel trainModel(LanguageDetectorFactory factory) throws Exception { LanguageDetectorSampleStream sampleStream = createSampleStream(); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); params.put("DataIndexer", "TwoPass"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); return LanguageDetectorME.train(sampleStream, params, factory); }
try { LanguageDetectorFactory factory = LanguageDetectorFactory.create(params.getFactory()); model = LanguageDetectorME.train(sampleStream, mlParams, factory); } catch (IOException e) { throw createTerminationIOException(e);
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<LanguageSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<LanguageSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<LanguageSample> trainingSampleStream = partitioner.next(); LanguageDetectorModel model = LanguageDetectorME.train( trainingSampleStream, params, factory); LanguageDetectorEvaluator evaluator = new LanguageDetectorEvaluator( new LanguageDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }
@BeforeClass public static void train() throws Exception { ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory( LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt"); PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8"); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, "100"); params.put(TrainingParameters.CUTOFF_PARAM, "5"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); model = LanguageDetectorME.train(sampleStream, params, new DummyFactory()); }
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<LanguageSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<LanguageSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<LanguageSample> trainingSampleStream = partitioner.next(); LanguageDetectorModel model = LanguageDetectorME.train( trainingSampleStream, params, factory); LanguageDetectorEvaluator evaluator = new LanguageDetectorEvaluator( new LanguageDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }
try { LanguageDetectorFactory factory = LanguageDetectorFactory.create(params.getFactory()); model = LanguageDetectorME.train(sampleStream, mlParams, factory); } catch (IOException e) { throw createTerminationIOException(e);
try { LanguageDetectorFactory factory = LanguageDetectorFactory.create(params.getFactory()); model = LanguageDetectorME.train(sampleStream, mlParams, factory); } catch (IOException e) { throw createTerminationIOException(e);
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<LanguageSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<LanguageSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<LanguageSample> trainingSampleStream = partitioner.next(); LanguageDetectorModel model = LanguageDetectorME.train( trainingSampleStream, params, factory); LanguageDetectorEvaluator evaluator = new LanguageDetectorEvaluator( new LanguageDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }