/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ @Deprecated public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException { return train(languageCode, samples, useTokenEnd, abbreviations, ModelUtil.createDefaultTrainingParameters()); } }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
private static SentenceModel train(SentenceDetectorFactory factory) throws IOException { return SentenceDetectorME.train("eng", createSampleStream(), factory, TrainingParameters.defaultParams()); }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/SentencesInsufficient.txt"); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); SentenceDetectorME.train("eng", new SentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); }
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ @Deprecated public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException { return train(languageCode, samples, useTokenEnd, abbreviations, ModelUtil.createDefaultTrainingParameters()); } }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ @Deprecated public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException { return train(languageCode, samples, useTokenEnd, abbreviations, ModelUtil.createDefaultTrainingParameters()); } }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( params.getFactory(), params.getLang(), true, dict, eos); model = SentenceDetectorME.train(params.getLang(), sampleStream, sdFactory, mlParams); } catch (IOException e) {
private static SentenceModel train(InputStream corpus, int iterations, int cut) throws IOException { ObjectStream<String> lineStream = new PlainTextByLineStream(corpus, Charset.forName("UTF-8")); ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream); SentenceModel model; try { model = SentenceDetectorME.train("en", sampleStream, true, null, TrainingParameters.defaultParams()); } finally { sampleStream.close(); } return model; }
SentenceModel sentenceModel = SentenceDetectorME.train(language, samples, sdFactory, mlParams);
SentenceDetectorFactory factory = new SentenceDetectorFactory(language, true, abbreviationDictionary, eosCharacters); return SentenceDetectorME.train(language, stream, factory, params);
mod = SentenceDetectorME.train("en", sampleStream, true, dict, mlParams); } finally { sampleStream.close();
mod = SentenceDetectorME.train("en", sampleStream, sdFactory, mlParams); } finally { sampleStream.close();
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
SentenceModel sentdetectModel = SentenceDetectorME.train( "eng", new SentenceSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams);
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( params.getFactory(), params.getLang(), true, dict, eos); model = SentenceDetectorME.train(params.getLang(), sampleStream, sdFactory, mlParams); } catch (IOException e) {
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( params.getFactory(), params.getLang(), true, dict, eos); model = SentenceDetectorME.train(params.getLang(), sampleStream, sdFactory, mlParams); } catch (IOException e) {