public DataIndexer getDataIndexer(ObjectStream<Event> events) throws IOException { trainingParameters.put(AbstractDataIndexer.SORT_PARAM, isSortAndMerge()); // If the cutoff was set, don't overwrite the value. if (trainingParameters.getIntParameter(CUTOFF_PARAM, -1) == -1) { trainingParameters.put(CUTOFF_PARAM, 5); } DataIndexer indexer = DataIndexerFactory.getDataIndexer(trainingParameters, reportMap); indexer.index(events); return indexer; }
@Test public void testLastLineBug() throws IOException { try (RealValueFileEventStream rvfes = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-ok.txt")) { indexer.index(rvfes); } Assert.assertEquals(1, indexer.getOutcomeLabels().length); try (RealValueFileEventStream rvfes = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-broken.txt")) { indexer.index(rvfes); } Assert.assertEquals(1, indexer.getOutcomeLabels().length); } }
@Test public void testTrainModelReturnsAQNModel() throws Exception { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); testDataIndexer.index(rvfes1); // when QNModel trainedModel = new QNTrainer(false).trainModel(ITERATIONS, testDataIndexer); // then Assert.assertNotNull(trainedModel); }
@Test public void testValueAtNonInitialPoint01() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when double[] nonInitialPoint = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; double value = objectFunction.valueAt(nonInitialPoint); double expectedValue = 13.862943611198894; // then Assert.assertEquals(expectedValue, value, TOLERANCE01); }
@Test public void testMaxentOnPrepAttachData() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); // this shows why the GISTrainer should be a AbstractEventTrainer. // TODO: make sure that the trainingParameter cutoff and the // cutoff value passed here are equal. AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 1); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testNaiveBayes4() throws IOException { testDataIndexer.index(createTrainingStream()); NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); String label = "politics"; String[] context = {}; Event event = new Event(label, context); testModel(model, event, 7.0 / 12.0); }
@Test public void testMaxentOnPrepAttachData2Threads() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 2); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testNaiveBayes2() throws IOException { testDataIndexer.index(createTrainingStream()); NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); String label = "sports"; String[] context = {"bow=manchester", "bow=united"}; Event event = new Event(label, context); // testModel(model, event, 1.0); // Expected value without smoothing testModel(model, event, 0.9658833555831029); // Expected value with smoothing }
@Test public void testNaiveBayes3() throws IOException { testDataIndexer.index(createTrainingStream()); NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); String label = "politics"; String[] context = {"bow=united"}; Event event = new Event(label, context); //testModel(model, event, 2.0/3.0); // Expected value without smoothing testModel(model, event, 0.6655036407766989); // Expected value with smoothing }
@Test public void testNaiveBayesOnPrepAttachData() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); MaxentModel model = new NaiveBayesTrainer().trainModel(testDataIndexer); Assert.assertTrue(model instanceof NaiveBayesModel); PrepAttachDataUtil.testModel(model, 0.7897994553107205); }
@Test public void testNaiveBayes1() throws IOException { testDataIndexer.index(createTrainingStream()); NaiveBayesModel model = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); String label = "politics"; String[] context = {"bow=united", "bow=nations"}; Event event = new Event(label, context); // testModel(model, event, 1.0); // Expected value without smoothing testModel(model, event, 0.9681650180264167); // Expected value with smoothing }
@Test public void testModel() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); testDataIndexer.index(rvfes1); // when QNModel trainedModel = new QNTrainer(15, true).trainModel( ITERATIONS, testDataIndexer); Assert.assertFalse(trainedModel.equals(null)); }
@Test public void testValueAtInitialPoint() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when double value = objectFunction.valueAt(objectFunction.getInitialPoint()); double expectedValue = 13.86294361; // then Assert.assertEquals(expectedValue, value, TOLERANCE01); }
@Test public void testGradientSanity() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when double[] initial = objectFunction.getInitialPoint(); double[] gradientAtInitial = objectFunction.gradientAt(initial); // then Assert.assertNotNull(gradientAtInitial); }
@Test public void testNaiveBayes1() throws IOException { testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream()); NaiveBayesModel model1 = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); NaiveBayesModel model2 = persistedModel(model1); String label = "politics"; String[] context = {"bow=united", "bow=nations"}; Event event = new Event(label, context); testModelOutcome(model1, model2, event); }
@Test public void testQNOnPrepAttachData() throws IOException { DataIndexer indexer = new TwoPassDataIndexer(); TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); indexingParameters.put(AbstractDataIndexer.SORT_PARAM, false); indexer.init(indexingParameters, new HashMap<>()); indexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new QNTrainer(true).trainModel(100, indexer ); PrepAttachDataUtil.testModel(model, 0.8155484030700668); }
@Test public void testNaiveBayes2() throws IOException { testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream()); NaiveBayesModel model1 = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); NaiveBayesModel model2 = persistedModel(model1); String label = "sports"; String[] context = {"bow=manchester", "bow=united"}; Event event = new Event(label, context); testModelOutcome(model1, model2, event); }
@Test public void testNaiveBayes3() throws IOException { testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream()); NaiveBayesModel model1 = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); NaiveBayesModel model2 = persistedModel(model1); String label = "politics"; String[] context = {"bow=united"}; Event event = new Event(label, context); testModelOutcome(model1, model2, event); }
@Test public void testNaiveBayes4() throws IOException { testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream()); NaiveBayesModel model1 = (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer); NaiveBayesModel model2 = persistedModel(model1); String label = "politics"; String[] context = {}; Event event = new Event(label, context); testModelOutcome(model1, model2, event); }
@Test public void testDomainDimensionSanity() throws IOException { // given RealValueFileEventStream rvfes1 = new RealValueFileEventStream( "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt", "UTF-8"); testDataIndexer.index(rvfes1); NegLogLikelihood objectFunction = new NegLogLikelihood(testDataIndexer); // when int correctDomainDimension = testDataIndexer.getPredLabels().length * testDataIndexer.getOutcomeLabels().length; // then Assert.assertEquals(correctDomainDimension, objectFunction.getDimension()); }