public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory, TrainingParameters mlParams) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong ObjectStream<Event> eventStream = new SDEventStream(samples, sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner()); EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); MaxentModel sentModel = trainer.train(eventStream); return new SentenceModel(languageCode, sentModel, manifestInfoEntries, sdFactory); }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory, TrainingParameters mlParams) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong ObjectStream<Event> eventStream = new SDEventStream(samples, sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner()); EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); MaxentModel sentModel = trainer.train(eventStream); return new SentenceModel(languageCode, sentModel, manifestInfoEntries, sdFactory); }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory, TrainingParameters mlParams) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong ObjectStream<Event> eventStream = new SDEventStream(samples, sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner()); EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); MaxentModel sentModel = trainer.train(eventStream); return new SentenceModel(languageCode, sentModel, manifestInfoEntries, sdFactory); }
@Test public void testEventOutcomes() throws IOException { // Sample with two sentences SentenceSample sample = new SentenceSample("Test sent. one. Test sent. 2?", new Span(0, 15), new Span(16, 29)); ObjectStream<SentenceSample> sampleStream = ObjectStreamUtils.createObjectStream(sample); Factory factory = new Factory(); ObjectStream<Event> eventStream = new SDEventStream(sampleStream, factory.createSentenceContextGenerator("eng"), factory.createEndOfSentenceScanner("eng")); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); } }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }