private ObjectStream<Event> createEventStream() { List<Event> events = new ArrayList<>(); for (int i = 0; i < cntx.length; i++) { events.add(new Event(outputs[i], cntx[i])); } return ObjectStreamUtils.createObjectStream(events); }
ObjectStream<String> stream = ObjectStreamUtils.concatenateObjectStream( ObjectStreamUtils.createObjectStream(data1), ObjectStreamUtils.createObjectStream(data2)); compare(stream, expected); listOfStreams.add(ObjectStreamUtils.createObjectStream(data1) ); listOfStreams.add(ObjectStreamUtils.createObjectStream(data2) ); stream = ObjectStreamUtils.concatenateObjectStream(listOfStreams); compare(stream, expected); streamSet.add(ObjectStreamUtils.createObjectStream(data1) ); streamSet.add(ObjectStreamUtils.createObjectStream(data2) ); stream = ObjectStreamUtils.concatenateObjectStream(streamSet);
public ObjectStream<DocumentSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); File[] sentencesFiles = sentencesFileDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("sentences") && name.endsWith(".txt"); } }); @SuppressWarnings("unchecked") ObjectStream<DocumentSample>[] sampleStreams = new ObjectStream[sentencesFiles.length]; for (int i = 0; i < sentencesFiles.length; i++) { try { sampleStreams[i] = new LeipzigDoccatSampleStream( sentencesFiles[i].getName().substring(0, 3), 20, CmdLineUtil.createInputStreamFactory(sentencesFiles[i])); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage(), e); } } return ObjectStreamUtils.concatenateObjectStream(sampleStreams); }
ObjectStream<DocumentSample> combinedDocumentSampleStream = ObjectStreamUtils.concatenateObjectStream(categoryStreams); nameStreams.add(nameSampleStream); ObjectStream<NameSample> combinedNameSampleStream = ObjectStreamUtils.concatenateObjectStream(nameStreams);
public static ObjectStream<Event> createTrainingStream() throws IOException { List<Event> trainingEvents = readPpaFile("training"); return ObjectStreamUtils.createObjectStream(trainingEvents); }
@Test public void buildStreamTest() throws IOException { String[] data = {"dog","cat","pig","frog"}; // make a stream out of the data array... ObjectStream<String> stream = ObjectStreamUtils.createObjectStream(data); compare(stream, data); // make a stream out of a list... List<String> dataList = Arrays.asList(data); stream = ObjectStreamUtils.createObjectStream(Arrays.asList(data)); compare(stream, data); // make a stream out of a set... // A treeSet will order the set in Alphabetical order, so // we can compare it with the sorted Array, but this changes the // array. so it must be checked last. Arrays.sort(data); stream = ObjectStreamUtils.createObjectStream(new TreeSet<>(dataList)); compare(stream, data); }
@Override public void index(ObjectStream<Event> eventStream) throws IOException { int cutoff = trainingParameters.getIntParameter(CUTOFF_PARAM, CUTOFF_DEFAULT); boolean sort = trainingParameters.getBooleanParameter(SORT_PARAM, SORT_DEFAULT); long start = System.currentTimeMillis(); display("Indexing events with OnePass using cutoff of " + cutoff + "\n\n"); display("\tComputing event counts... "); Map<String, Integer> predicateIndex = new HashMap<>(); List<Event> events = computeEventCounts(eventStream, predicateIndex, cutoff); display("done. " + events.size() + " events\n"); display("\tIndexing... "); List<ComparableEvent> eventsToCompare = index(ObjectStreamUtils.createObjectStream(events), predicateIndex); display("done.\n"); display("Sorting and merging events... "); sortAndMerge(eventsToCompare, sort); display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d)); }
public static ObjectStream<Event> createTrainingStream() throws IOException { List<Event> trainingEvents = new ArrayList<>(); String label1 = "politics"; String[] context1 = {"bow=the", "bow=united", "bow=nations"}; trainingEvents.add(new Event(label1, context1)); String label2 = "politics"; String[] context2 = {"bow=the", "bow=united", "bow=states", "bow=and"}; trainingEvents.add(new Event(label2, context2)); String label3 = "sports"; String[] context3 = {"bow=manchester", "bow=united"}; trainingEvents.add(new Event(label3, context3)); String label4 = "sports"; String[] context4 = {"bow=manchester", "bow=and", "bow=barca"}; trainingEvents.add(new Event(label4, context4)); return ObjectStreamUtils.createObjectStream(trainingEvents); }
@Test public void testSplitTwoDocuments() throws IOException { StringBuilder docsString = new StringBuilder(); for (int i = 0; i < 2; i++) { docsString.append("<DOC>\n"); docsString.append("test document #").append(i).append("\n"); docsString.append("</DOC>\n"); } try (ObjectStream<String> docs = new DocumentSplitterStream( ObjectStreamUtils.createObjectStream(docsString.toString()))) { String doc1 = docs.read(); Assert.assertEquals(docsString.length() / 2, doc1.length() + 1); Assert.assertTrue(doc1.contains("#0")); String doc2 = docs.read(); Assert.assertEquals(docsString.length() / 2, doc2.length() + 1); Assert.assertTrue(doc2.contains("#1")); Assert.assertNull(docs.read()); Assert.assertNull(docs.read()); } } }
@Test public void testWithNameTypeAndInvalidData() { try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream("<START:> Name <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream( "<START:street> <START:person> Name <END> <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } }
/** * Checks that invalid spans cause an {@link ObjectStreamException} to be thrown. */ @Test public void testWithoutNameTypeAndInvalidData() { try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream("<START> <START> Name <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream("<START> Name <END> <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } try (NameSampleDataStream sampleStream = new NameSampleDataStream( ObjectStreamUtils.createObjectStream( "<START> <START> Person <END> Street <END>"))) { sampleStream.read(); fail(); } catch (IOException expected) { // the read above is expected to throw an exception } }
@Test(expected = InsufficientTrainingDataException.class) public void insufficientTestData() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); }
@Test public void testIndexWithNewline() throws IOException { String[] sentence = "He belongs to Apache \n Software Foundation .".split(" "); NameContextGenerator CG = new DefaultNameContextGenerator( (AdaptiveFeatureGenerator[]) null); NameSample nameSample = new NameSample(sentence, new Span[] { new Span(3, 7) }, false); ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null); DataIndexer indexer = new TwoPassDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); indexer.index(eventStream); Assert.assertEquals(5, indexer.getContexts().length); } }
@Test public void testTokensAreCorrect() throws IOException { try (ObjectStream<Parse> samples = new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) { Parse p = samples.read(); Parse[] tagNodes = p.getTagNodes(); String[] tokens = new String[tagNodes.length]; for (int ti = 0; ti < tagNodes.length; ti++) { tokens[ti] = tagNodes[ti].getCoveredText(); } Assert.assertArrayEquals(sample1Tokens, tokens); } } }
@Test public void testSimpleReading() throws IOException { try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5"))) { Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) { Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } }
@Test public void testThereIsExactlyOneSent() throws IOException { try (ObjectStream<Parse> samples = new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) { Assert.assertNotNull(samples.read()); Assert.assertNull(samples.read()); Assert.assertNull(samples.read()); } }
/** * Main Senseval Reader: This checks if the data corresponding to the words to * disambiguate exist in the folder, and extract the * * @param wordTag * The word, of which we are looking for the instances * @return the stream of {@link WSDSample} of the word to disambiguate */ public ObjectStream<WSDSample> getSensevalDataStream(String wordTag) { return ObjectStreamUtils.createObjectStream(getSensevalData(wordTag)); }
/** * Semcor reader: This reads all the files in semcor, and return all the * instances in the format {@link WSDSample} of a specific word * * @param wordTag * The word, of which we are looking for the instances * @return the stream of {@link WSDSample} of the word to disambiguate */ public ObjectStream<WSDSample> getSemcorDataStream(String wordTag) { return ObjectStreamUtils.createObjectStream(getSemcorData(wordTag)); }
@Test public void testReset() throws IOException { try (ParagraphStream paraStream = new ParagraphStream( ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) { Assert.assertEquals("1\n2\n", paraStream.read()); paraStream.reset(); Assert.assertEquals("1\n2\n", paraStream.read()); Assert.assertEquals("4\n5\n", paraStream.read()); Assert.assertNull(paraStream.read()); } } }
@Test public void testSimpleTraining() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"}), new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), new DocumentSample("0", new String[]{"x", "y", "z"}), new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); DocumentCategorizer doccat = new DocumentCategorizerME(model); double[] aProbs = doccat.categorize(new String[]{"a"}); Assert.assertEquals("1", doccat.getBestCategory(aProbs)); double[] bProbs = doccat.categorize(new String[]{"x"}); Assert.assertEquals("0", doccat.getBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"}); Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey()); Assert.assertEquals(1, cat.size()); }