public ObjectStream<LemmaSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LemmaSampleStream(lineStream); } }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); }
@Before public void startup() throws IOException { // train the lemmatizer ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); LemmatizerModel lemmatizerModel = LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); this.lemmatizer = new LemmatizerME(lemmatizerModel); }
public ObjectStream<LemmaSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LemmaSampleStream(lineStream); } }
public ObjectStream<LemmaSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LemmaSampleStream(lineStream); } }