/** * Train a model using the GIS algorithm, assuming 100 iterations and no * cutoff. * * @param eventStream * The EventStream holding the data on which this model will be * trained. * @return The newly trained model, which can be used immediately or saved to * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(ObjectStream<Event> eventStream) throws IOException { return trainModel(eventStream, 100, 0); }
double prevLL = 0.0; double currLL; display("Performing " + iterations + " iterations.\n"); for (int i = 1; i <= iterations; i++) { if (i < 10) { display(" " + i + ": "); } else if (i < 100) { display(" " + i + ": "); } else { display(i + ": "); currLL = nextIteration(correctionConstant, completionService); if (i > 1) { if (prevLL > currLL) {
display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); values = di.getValues(); display("done.\n"); numPreds = predLabels.length; display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); display("...done.\n"); display("Computing model parameters ...\n"); } else { display("Computing model parameters in " + threads + " threads...\n"); findParameters(iterations, correctionConstant);
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
display("."); display("."); for (int aoi = 0; aoi < activeOutcomes.length; aoi++) { if (useGaussianSmoothing) { params[pi].updateParameter(aoi, gaussianUpdate(pi, aoi, correctionConstant)); } else { if (model[aoi] == 0) { display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n");
@Test public void testMaxentOnPrepAttachData() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); // this shows why the GISTrainer should be a AbstractEventTrainer. // TODO: make sure that the trainingParameter cutoff and the // cutoff value passed here are equal. AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 1); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
boolean printMessagesWhileTraining, boolean smoothing, Prior modelPrior, int cutoff, int threads) { GISTrainer trainer = new GISTrainer(printMessagesWhileTraining); trainer.setSmoothing(smoothing); trainer.setSmoothingObservation(SMOOTHING_OBSERVATION); if (modelPrior == null) { modelPrior = new UniformPrior(); return trainer.trainModel(iterations, indexer, modelPrior, cutoff, threads);
@Test public void testGISTrainSignature4() throws IOException { try (ObjectStream<Event> eventStream = createEventStream()) { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); params.put(AbstractTrainer.ITERATIONS_PARAM, 10); params.put(AbstractTrainer.CUTOFF_PARAM, 1); GISTrainer trainer = (GISTrainer) TrainerFactory.getEventTrainer(params, null); trainer.setGaussianSigma(0.01); Assert.assertNotNull(trainer.trainModel(eventStream)); } }
display("WARNING: the training parameter: " + OLD_LL_THRESHOLD_PARAM + " has been deprecated. Please use " + LOG_LIKELIHOOD_THRESHOLD_DEFAULT + " instead");
@Test public void testMaxentOnPrepAttachData2Threads() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 2); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
display("."); display("."); for (int aoi = 0; aoi < activeOutcomes.length; aoi++) { if (useGaussianSmoothing) { params[pi].updateParameter(aoi, gaussianUpdate(pi, aoi, correctionConstant)); } else { if (model[aoi] == 0) { display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n");
display("WARNING: the training parameter: " + OLD_LL_THRESHOLD_PARAM + " has been deprecated. Please use " + LOG_LIKELIHOOD_THRESHOLD_DEFAULT + " instead");
@Test public void testRealValuedWeightsVsRepeatWeighting() throws IOException { GISModel realModel; GISTrainer gisTrainer = new GISTrainer(); try (RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt")) { testDataIndexer.index(rvfes1); realModel = gisTrainer.trainModel(100, testDataIndexer); "src/test/resources/data/opennlp/maxent/repeat-weighting-training-data.txt")) { testDataIndexer.index(rvfes2); repeatModel = gisTrainer.trainModel(100,testDataIndexer);
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di) { return trainModel(iterations, di, new UniformPrior(), 1); }
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
double prevLL = 0.0; double currLL; display("Performing " + iterations + " iterations.\n"); for (int i = 1; i <= iterations; i++) { if (i < 10) { display(" " + i + ": "); } else if (i < 100) { display(" " + i + ": "); } else { display(i + ": "); currLL = nextIteration(correctionConstant, completionService); if (i > 1) { if (prevLL > currLL) {
display("."); display("."); for (int aoi = 0; aoi < activeOutcomes.length; aoi++) { if (useGaussianSmoothing) { params[pi].updateParameter(aoi, gaussianUpdate(pi, aoi, correctionConstant)); } else { if (model[aoi] == 0) { display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n");
display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); values = di.getValues(); display("done.\n"); numPreds = predLabels.length; display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); display("...done.\n"); display("Computing model parameters ...\n"); } else { display("Computing model parameters in " + threads + " threads...\n"); findParameters(iterations, correctionConstant);
display("WARNING: the training parameter: " + OLD_LL_THRESHOLD_PARAM + " has been deprecated. Please use " + LOG_LIKELIHOOD_THRESHOLD_DEFAULT + " instead");