public final MaxentModel train(DataIndexer indexer) throws IOException { validate(); if (indexer.getOutcomeLabels().length <= 1) { throw new InsufficientTrainingDataException("Training data must contain more than one outcome"); } MaxentModel model = doTrain(indexer); addToReport(AbstractTrainer.TRAINER_TYPE_PARAM, EventTrainer.EVENT_VALUE); return model; }
private boolean compareDoubleArray(double[] expected, double[] actual, DataIndexer indexer, double tolerance) { double[] alignedActual = alignDoubleArrayForTestData( actual, indexer.getPredLabels(), indexer.getOutcomeLabels()); if (expected.length != alignedActual.length) { return false; } for (int i = 0; i < alignedActual.length; i++) { if (Math.abs(alignedActual[i] - expected[i]) > tolerance) { return false; } } return true; } }
int nPredLabels = predLabels.length; String[] outcomeNames = indexer.getOutcomeLabels(); int nOutcomes = outcomeNames.length;
public NegLogLikelihood(DataIndexer indexer) { // Get data from indexer. if (indexer instanceof OnePassRealValueDataIndexer) { this.values = indexer.getValues(); } else { this.values = null; } this.contexts = indexer.getContexts(); this.outcomeList = indexer.getOutcomeList(); this.numTimesEventsSeen = indexer.getNumTimesEventsSeen(); this.numOutcomes = indexer.getOutcomeLabels().length; this.numFeatures = indexer.getPredLabels().length; this.numContexts = this.contexts.length; this.dimension = numOutcomes * numFeatures; this.expectation = new double[numOutcomes]; this.tempSums = new double[numOutcomes]; this.gradient = new double[dimension]; }
@Test public void testLastLineBug() throws IOException { try (RealValueFileEventStream rvfes = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-ok.txt")) { indexer.index(rvfes); } Assert.assertEquals(1, indexer.getOutcomeLabels().length); try (RealValueFileEventStream rvfes = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-broken.txt")) { indexer.index(rvfes); } Assert.assertEquals(1, indexer.getOutcomeLabels().length); } }
/** * Evaluate the current model on training data set * @return model's training accuracy */ @Override public double evaluate(double[] parameters) { int[][] contexts = indexer.getContexts(); float[][] values = indexer.getValues(); int[] nEventsSeen = indexer.getNumTimesEventsSeen(); int[] outcomeList = indexer.getOutcomeList(); int nOutcomes = indexer.getOutcomeLabels().length; int nPredLabels = indexer.getPredLabels().length; int nCorrect = 0; int nTotalEvents = 0; for (int ei = 0; ei < contexts.length; ei++) { int[] context = contexts[ei]; float[] value = values == null ? null : values[ei]; double[] probs = new double[nOutcomes]; QNModel.eval(context, value, probs, nOutcomes, nPredLabels, parameters); int outcome = ArrayMath.argmax(probs); if (outcome == outcomeList[ei]) { nCorrect += nEventsSeen[ei]; } nTotalEvents += nEventsSeen[ei]; } return (double) nCorrect / nTotalEvents; } }
@Test public void testDomainDimensionSanity() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when int correctDomainDimension = testDataIndexer.getPredLabels().length * testDataIndexer.getOutcomeLabels().length; // then Assert.assertEquals(correctDomainDimension, objectFunction.getDimension()); }
outcomeLabels = di.getOutcomeLabels(); omap = new HashMap<>(); for (int oli = 0; oli < outcomeLabels.length; oli++) {
Assert.assertEquals("opennlp.tools.ml.model.OnePassDataIndexer", di.getClass().getName()); Assert.assertEquals(3, di.getNumEvents()); Assert.assertEquals(2, di.getOutcomeLabels().length); Assert.assertEquals(6, di.getPredLabels().length);
di.index(eventStream); Assert.assertEquals(3, di.getNumEvents()); Assert.assertEquals(2, di.getOutcomeLabels().length); Assert.assertEquals(6, di.getPredLabels().length); di.index(eventStream); Assert.assertEquals(3, di.getNumEvents()); Assert.assertEquals(2, di.getOutcomeLabels().length); Assert.assertEquals(6, di.getPredLabels().length);
@Test public void testValueAtNonInitialPoint02() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when double[] nonInitialPoint = new double[] { 3, 2, 3, 2, 3, 2, 3, 2, 3, 2 }; double value = objectFunction.valueAt(dealignDoubleArrayForTestData(nonInitialPoint, testDataIndexer.getPredLabels(), testDataIndexer.getOutcomeLabels())); double expectedValue = 53.163219721099026; // then Assert.assertEquals(expectedValue, value, TOLERANCE02); }
public AbstractModel trainModel(DataIndexer di) { display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); values = di.getValues(); numTimesEventsSeen = di.getNumTimesEventsSeen(); numEvents = di.getNumEvents(); numUniqueEvents = contexts.length; outcomeLabels = di.getOutcomeLabels(); outcomeList = di.getOutcomeList(); predLabels = di.getPredLabels(); numPreds = predLabels.length; numOutcomes = outcomeLabels.length; display("done.\n"); display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); display("Computing model parameters...\n"); MutableContext[] finalParameters = findParameters(); display("...done.\n"); /* Create and return the model ****/ return new NaiveBayesModel(finalParameters, predLabels, outcomeLabels); }
public AbstractModel trainModel(int iterations, DataIndexer di, int cutoff, boolean useAverage) { display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); values = di.getValues(); numTimesEventsSeen = di.getNumTimesEventsSeen(); numEvents = di.getNumEvents(); numUniqueEvents = contexts.length; outcomeLabels = di.getOutcomeLabels(); outcomeList = di.getOutcomeList(); predLabels = di.getPredLabels(); numPreds = predLabels.length; numOutcomes = outcomeLabels.length; display("done.\n"); display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); display("Computing model parameters...\n"); MutableContext[] finalParameters = findParameters(iterations, useAverage); display("...done.\n"); /* Create and return the model *************/ return new PerceptronModel(finalParameters, predLabels, outcomeLabels); }
@Test public void testGradientAtNonInitialPoint() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when double[] nonInitialPoint = new double[] { 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5 }; double[] gradientAtNonInitialPoint = objectFunction.gradientAt(dealignDoubleArrayForTestData(nonInitialPoint, testDataIndexer.getPredLabels(), testDataIndexer.getOutcomeLabels())); double[] expectedGradient = new double[] { -12.755042847945553, -21.227127506102434, -72.57790706276435, 38.03525795198456, 15.348650889354925, 12.755042847945557, 21.22712750610244, 72.57790706276438, -38.03525795198456, -15.348650889354925 }; // then Assert.assertTrue(compareDoubleArray(expectedGradient, gradientAtNonInitialPoint, testDataIndexer, TOLERANCE01)); }
outcomeLabels = di.getOutcomeLabels(); outcomeList = di.getOutcomeList(); numOutcomes = outcomeLabels.length;
public final MaxentModel train(DataIndexer indexer) throws IOException { validate(); if (indexer.getOutcomeLabels().length <= 1) { throw new InsufficientTrainingDataException("Training data must contain more than one outcome"); } MaxentModel model = doTrain(indexer); addToReport(AbstractTrainer.TRAINER_TYPE_PARAM, EventTrainer.EVENT_VALUE); return model; }
Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());