indexer = new TwoPassDataIndexer(); break;
display("Indexing events with TwoPass using cutoff of " + cutoff + "\n\n"); display("\tComputing event counts... "); HashSumEventStream writeEventStream = new HashSumEventStream(eventStream); // do not close. try (DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmp)))) { numEvents = computeEventCounts(writeEventStream, dos, predicateIndex, cutoff); display("done. " + numEvents + " events\n"); display("\tIndexing... "); eventsToCompare = index(readStream, predicateIndex); readHash = readStream.calculateHashSum(); throw new IOException("Event hash for writing and reading events did not match."); display("done.\n"); display("Sorting and merging events... "); display("Collecting events... "); sortAndMerge(eventsToCompare,sort); display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d));
update(ec, counter); for (String ctxString : ec) eventStore.writeUTF(ctxString);
@Test public void testPerceptronOnPrepAttachData() throws IOException { TwoPassDataIndexer indexer = new TwoPassDataIndexer(); TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); indexingParameters.put(AbstractDataIndexer.SORT_PARAM, false); indexer.init(indexingParameters, new HashMap<>()); indexer.index(PrepAttachDataUtil.createTrainingStream()); MaxentModel model = new PerceptronTrainer().trainModel(400, indexer, 1); PrepAttachDataUtil.testModel(model, 0.7650408516959644); }
display("Indexing events with TwoPass using cutoff of " + cutoff + "\n\n"); display("\tComputing event counts... "); try (Writer osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp), StandardCharsets.UTF_8))) { numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); display("done. " + numEvents + " events\n"); display("\tIndexing... "); eventsToCompare = index(fes, predicateIndex); display("done.\n"); display("Sorting and merging events... "); display("Collecting events... "); sortAndMerge(eventsToCompare,sort); display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d));
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false); testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
eventStore.write(FileEventStream.toLine(ev)); String[] ec = ev.getContext(); update(ec, counter);
display("Indexing events with TwoPass using cutoff of " + cutoff + "\n\n"); display("\tComputing event counts... "); HashSumEventStream writeEventStream = new HashSumEventStream(eventStream); // do not close. try (DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmp)))) { numEvents = computeEventCounts(writeEventStream, dos, predicateIndex, cutoff); display("done. " + numEvents + " events\n"); display("\tIndexing... "); eventsToCompare = index(readStream, predicateIndex); readHash = readStream.calculateHashSum(); throw new IOException("Event hash for writing and reading events did not match."); display("done.\n"); display("Sorting and merging events... "); display("Collecting events... "); sortAndMerge(eventsToCompare,sort); display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d));
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);; testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
update(ec, counter); for (String ctxString : ec) eventStore.writeUTF(ctxString);
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);; testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);; testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false); testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Test public void testIndexWithNewline() throws IOException { String[] sentence = "He belongs to Apache \n Software Foundation .".split(" "); NameContextGenerator CG = new DefaultNameContextGenerator( (AdaptiveFeatureGenerator[]) null); NameSample nameSample = new NameSample(sentence, new Span[] { new Span(3, 7) }, false); ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null); DataIndexer indexer = new TwoPassDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); indexer.index(eventStream); Assert.assertEquals(5, indexer.getContexts().length); } }
@Test public void testQNOnPrepAttachData() throws IOException { DataIndexer indexer = new TwoPassDataIndexer(); TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); indexingParameters.put(AbstractDataIndexer.SORT_PARAM, false); indexer.init(indexingParameters, new HashMap<>()); indexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new QNTrainer(true).trainModel(100, indexer ); PrepAttachDataUtil.testModel(model, 0.8155484030700668); }
@Override public DataIndexer getDataIndexer(ObjectStream<Event> events) throws IOException { String dataIndexerName = getStringParam(DATA_INDEXER_PARAM, DATA_INDEXER_TWO_PASS_VALUE); int cutoff = getCutoff(); boolean sortAndMerge = isSortAndMerge(); DataIndexer indexer = null; if (DATA_INDEXER_ONE_PASS_VALUE.equals(dataIndexerName)) { indexer = new OnePassDataIndexer(events, cutoff, sortAndMerge); } else if (DATA_INDEXER_TWO_PASS_VALUE.equals(dataIndexerName)) { indexer = new TwoPassDataIndexer(events, cutoff, sortAndMerge); } else if (DATA_INDEXER_CHI_SQUARED.equals(dataIndexerName)) { indexer = new ChiSquaredDataIndexer(events, cutoff, sortAndMerge); } else { throw new IllegalStateException("Unexpected data indexer name: " + dataIndexerName); } return indexer; }
indexer = new TwoPassDataIndexer(); break;
indexer = new TwoPassDataIndexer(); break;
.build(); DataIndexer indexer = new TwoPassDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); indexer.index(eventStream);