private ObjectStream<Event> createEventStream() { List<Event> events = new ArrayList<>(); for (int i = 0; i < cntx.length; i++) { events.add(new Event(outputs[i], cntx[i])); } return ObjectStreamUtils.createObjectStream(events); }
public static ObjectStream<Event> createTrainingStream() throws IOException { List<Event> trainingEvents = readPpaFile("training"); return ObjectStreamUtils.createObjectStream(trainingEvents); }
@Test public void buildStreamTest() throws IOException { String[] data = {"dog","cat","pig","frog"}; // make a stream out of the data array... ObjectStream<String> stream = ObjectStreamUtils.createObjectStream(data); compare(stream, data); // make a stream out of a list... List<String> dataList = Arrays.asList(data); stream = ObjectStreamUtils.createObjectStream(Arrays.asList(data)); compare(stream, data); // make a stream out of a set... // A treeSet will order the set in Alphabetical order, so // we can compare it with the sorted Array, but this changes the // array. so it must be checked last. Arrays.sort(data); stream = ObjectStreamUtils.createObjectStream(new TreeSet<>(dataList)); compare(stream, data); }
@Override public void index(ObjectStream<Event> eventStream) throws IOException { int cutoff = trainingParameters.getIntParameter(CUTOFF_PARAM, CUTOFF_DEFAULT); boolean sort = trainingParameters.getBooleanParameter(SORT_PARAM, SORT_DEFAULT); long start = System.currentTimeMillis(); display("Indexing events with OnePass using cutoff of " + cutoff + "\n\n"); display("\tComputing event counts... "); Map<String, Integer> predicateIndex = new HashMap<>(); List<Event> events = computeEventCounts(eventStream, predicateIndex, cutoff); display("done. " + events.size() + " events\n"); display("\tIndexing... "); List<ComparableEvent> eventsToCompare = index(ObjectStreamUtils.createObjectStream(events), predicateIndex); display("done.\n"); display("Sorting and merging events... "); sortAndMerge(eventsToCompare, sort); display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d)); }
public static ObjectStream<Event> createTrainingStream() throws IOException { List<Event> trainingEvents = new ArrayList<>(); String label1 = "politics"; String[] context1 = {"bow=the", "bow=united", "bow=nations"}; trainingEvents.add(new Event(label1, context1)); String label2 = "politics"; String[] context2 = {"bow=the", "bow=united", "bow=states", "bow=and"}; trainingEvents.add(new Event(label2, context2)); String label3 = "sports"; String[] context3 = {"bow=manchester", "bow=united"}; trainingEvents.add(new Event(label3, context3)); String label4 = "sports"; String[] context4 = {"bow=manchester", "bow=and", "bow=barca"}; trainingEvents.add(new Event(label4, context4)); return ObjectStreamUtils.createObjectStream(trainingEvents); }
@Test public void testWithNameTypeAndInvalidData() { try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream("<START:> Name <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream( "<START:street> <START:person> Name <END> <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } }
@Test(expected = InsufficientTrainingDataException.class) public void insufficientTestData() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); }
@Test public void testIndexWithNewline() throws IOException { String[] sentence = "He belongs to Apache \n Software Foundation .".split(" "); NameContextGenerator CG = new DefaultNameContextGenerator( (AdaptiveFeatureGenerator[]) null); NameSample nameSample = new NameSample(sentence, new Span[] { new Span(3, 7) }, false); ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null); DataIndexer indexer = new TwoPassDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); indexer.index(eventStream); Assert.assertEquals(5, indexer.getContexts().length); } }
@Test public void testTokensAreCorrect() throws IOException { try (ObjectStream<Parse> samples = new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) { Parse p = samples.read(); Parse[] tagNodes = p.getTagNodes(); String[] tokens = new String[tagNodes.length]; for (int ti = 0; ti < tagNodes.length; ti++) { tokens[ti] = tagNodes[ti].getCoveredText(); } Assert.assertArrayEquals(sample1Tokens, tokens); } } }
@Test public void testSimpleReading() throws IOException { try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5"))) { Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) { Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } }
@Test public void testThereIsExactlyOneSent() throws IOException { try (ObjectStream<Parse> samples = new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) { Assert.assertNotNull(samples.read()); Assert.assertNull(samples.read()); Assert.assertNull(samples.read()); } }
@Test public void testReset() throws IOException { try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) { Assert.assertEquals("1\n2\n", paraStream.read()); paraStream.reset(); Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } } }
@Test public void testSimpleTraining() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"}), new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), new DocumentSample("0", new String[]{"x", "y", "z"}), new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); DocumentCategorizer doccat = new DocumentCategorizerME(model); double[] aProbs = doccat.categorize(new String[]{"a"}); Assert.assertEquals("1", doccat.getBestCategory(aProbs)); double[] bProbs = doccat.categorize(new String[]{"x"}); Assert.assertEquals("0", doccat.getBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"}); Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey()); Assert.assertEquals(1, cat.size()); }
/** * Tests if the {@link TokenSample} correctly tokenizes tokens which * are separated by a whitespace. * */ @Test public void testParsingWhitespaceSeparatedTokens() throws IOException { String sampleTokens = "Slave to the wage"; ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream( ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); Span[] tokenSpans = tokenSample.getTokenSpans(); Assert.assertEquals(4, tokenSpans.length); Assert.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens)); Assert.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens)); Assert.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens)); Assert.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens)); }
/** * Tests the correctly generated outcomes for a test sentence. */ @Test public void testOutcomesForSingleTypeSentence() throws IOException { NameSample nameSample = new NameSample(SENTENCE, new Span[]{new Span(0, 2, "person")}, false); try (ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample))) { Assert.assertEquals("person-" + NameFinderME.START, eventStream.read().getOutcome()); Assert.assertEquals("person-" + NameFinderME.CONTINUE, eventStream.read().getOutcome()); for (int i = 0; i < 10; i++) { Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome()); } Assert.assertNull(eventStream.read()); } }
/** * Tests that the outcomes for a single sentence match the * expected outcomes. */ @Test public void testOutcomesForSingleSentence() throws Exception { String sentence = "That_DT sounds_VBZ good_JJ ._."; POSSample sample = POSSample.parse(sentence); try (ObjectStream<Event> eventStream = new POSSampleEventStream( ObjectStreamUtils.createObjectStream(sample))) { Assert.assertEquals("DT", eventStream.read().getOutcome()); Assert.assertEquals("VBZ", eventStream.read().getOutcome()); Assert.assertEquals("JJ", eventStream.read().getOutcome()); Assert.assertEquals(".", eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); } } }
/** * Tests the correctly generated outcomes for a test sentence. If the Span * declares its type, passing the type to event stream has no effect */ @Test public void testOutcomesTypeCantOverride() throws IOException { String type = "XYZ"; NameSample nameSample = new NameSample(SENTENCE, new Span[] { new Span(0, 2, "person") }, false); ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample), type, CG, null); String prefix = type + "-"; Assert.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome()); Assert.assertEquals(prefix + NameFinderME.CONTINUE, eventStream.read().getOutcome()); for (int i = 0; i < 10; i++) { Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome()); } Assert.assertNull(eventStream.read()); eventStream.close(); }
/** * Tests if the {@link TokenSample} correctly tokenizes tokens which * are separated by whitespace and by the split chars. * */ @Test public void testParsingWhitespaceAndSeparatedString() throws IOException { String sampleTokens = "a b<SPLIT>c d<SPLIT>e"; try (ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream( ObjectStreamUtils.createObjectStream(sampleTokens))) { TokenSample tokenSample = sampleTokenStream.read(); Span[] tokenSpans = tokenSample.getTokenSpans(); Assert.assertEquals(5, tokenSpans.length); Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText())); Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText())); Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText())); Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText())); Assert.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText())); } } }
/** * Tests the event stream for correctly generated outcomes. */ @Test public void testEventOutcomes() throws IOException { ObjectStream<String> sentenceStream = ObjectStreamUtils.createObjectStream("\"<SPLIT>out<SPLIT>.<SPLIT>\""); ObjectStream<TokenSample> tokenSampleStream = new TokenSampleStream(sentenceStream); try (ObjectStream<Event> eventStream = new TokSpanEventStream(tokenSampleStream, false)) { Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); Assert.assertNull(eventStream.read()); } } }
@Test public void testEventOutcomes() throws IOException { // Sample with two sentences SentenceSample sample = new SentenceSample("Test sent. one. Test sent. 2?", new Span(0, 15), new Span(16, 29)); ObjectStream<SentenceSample> sampleStream = ObjectStreamUtils.createObjectStream(sample); Factory factory = new Factory(); ObjectStream<Event> eventStream = new SDEventStream(sampleStream, factory.createSentenceContextGenerator("eng"), factory.createEndOfSentenceScanner("eng")); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); } }