@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); }
@Before public void startup() throws IOException { // train the lemmatizer ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); LemmatizerModel lemmatizerModel = LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); this.lemmatizer = new LemmatizerME(lemmatizerModel); }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("lemmatizer model", modelOutFile); LemmatizerModel model; try { LemmatizerFactory lemmatizerFactory = LemmatizerFactory .create(params.getFactory()); model = LemmatizerME.train(params.getLang(), sampleStream, mlParams, lemmatizerFactory); } catch (IOException e) { throw createTerminationIOException(e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("lemmatizer", modelOutFile, model); } }
private double trainAndEval(String lang, File trainFile, TrainingParameters params, File evalFile) throws IOException { ConlluTagset tagset = ConlluTagset.X; ObjectStream<LemmaSample> trainSamples = new ConlluLemmaSampleStream(new ConlluStream( new MarkableFileInputStreamFactory(trainFile)), tagset); LemmatizerModel model = LemmatizerME.train(lang, trainSamples, params, new LemmatizerFactory()); LemmatizerEvaluator evaluator = new LemmatizerEvaluator(new LemmatizerME(model)); evaluator.evaluate(new ConlluLemmaSampleStream(new ConlluStream( new MarkableFileInputStreamFactory(evalFile)), tagset)); return evaluator.getWordAccuracy(); }
@Override public Callable<? extends LemmatizerModel> makeTrainer() { TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, algorithm); params.put(TrainingParameters.TRAINER_TYPE_PARAM, trainerType); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iterations)); params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cutoff)); params.put(TrainingParameters.THREADS_PARAM, Integer.toString(numThreads)); params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); Callable<LemmatizerModel> trainTask = () -> { try { return LemmatizerME.train(language, getStream(), params, new LemmatizerFactory()); } catch (Throwable e) { getStream().close(); throw e; } }; return trainTask; } }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("lemmatizer model", modelOutFile); LemmatizerModel model; try { LemmatizerFactory lemmatizerFactory = LemmatizerFactory .create(params.getFactory()); model = LemmatizerME.train(params.getLang(), sampleStream, mlParams, lemmatizerFactory); } catch (IOException e) { throw createTerminationIOException(e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("lemmatizer", modelOutFile, model); } }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("lemmatizer model", modelOutFile); LemmatizerModel model; try { LemmatizerFactory lemmatizerFactory = LemmatizerFactory .create(params.getFactory()); model = LemmatizerME.train(params.getLang(), sampleStream, mlParams, lemmatizerFactory); } catch (IOException e) { throw createTerminationIOException(e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("lemmatizer", modelOutFile, model); } }