@Test public void testMaxentOnPrepAttachData() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); // this shows why the GISTrainer should be a AbstractEventTrainer. // TODO: make sure that the trainingParameter cutoff and the // cutoff value passed here are equal. AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 1); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testMaxentOnPrepAttachData2Threads() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 2); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testRealValuedWeightsVsRepeatWeighting() throws IOException { GISModel realModel; GISTrainer gisTrainer = new GISTrainer(); try (RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt")) {
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }
boolean printMessagesWhileTraining, boolean smoothing, Prior modelPrior, int cutoff, int threads) { GISTrainer trainer = new GISTrainer(printMessagesWhileTraining); trainer.setSmoothing(smoothing); trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);