opennlp.tools.util.ObjectStreamUtils java code examples

private ObjectStream<Event> createEventStream() {
 List<Event> events = new ArrayList<>();
 for (int i = 0; i < cntx.length; i++) {
  events.add(new Event(outputs[i], cntx[i]));
 }
 return ObjectStreamUtils.createObjectStream(events);
}

ObjectStream<String> stream = ObjectStreamUtils.concatenateObjectStream(
  ObjectStreamUtils.createObjectStream(data1),
  ObjectStreamUtils.createObjectStream(data2));
compare(stream, expected);
listOfStreams.add(ObjectStreamUtils.createObjectStream(data1) );
listOfStreams.add(ObjectStreamUtils.createObjectStream(data2) );
stream = ObjectStreamUtils.concatenateObjectStream(listOfStreams);
compare(stream, expected);
streamSet.add(ObjectStreamUtils.createObjectStream(data1) );
streamSet.add(ObjectStreamUtils.createObjectStream(data2) );
stream = ObjectStreamUtils.concatenateObjectStream(streamSet);

public ObjectStream<DocumentSample> create(String[] args) {
 Parameters params = ArgumentParser.parse(args, Parameters.class);
 File sentencesFileDir = params.getSentencesDir();
 File[] sentencesFiles = sentencesFileDir.listFiles(new FilenameFilter() {
  @Override
  public boolean accept(File dir, String name) {
   return name.contains("sentences") && name.endsWith(".txt");
  }
 });
 @SuppressWarnings("unchecked")
 ObjectStream<DocumentSample>[] sampleStreams =
   new ObjectStream[sentencesFiles.length];
 for (int i = 0; i < sentencesFiles.length; i++) {
  try {
   sampleStreams[i] = new LeipzigDoccatSampleStream(
     sentencesFiles[i].getName().substring(0, 3), 20,
     CmdLineUtil.createInputStreamFactory(sentencesFiles[i]));
  } catch (IOException e) {
   throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage(), e);
  }
 }
 return ObjectStreamUtils.concatenateObjectStream(sampleStreams);
}

ObjectStream<DocumentSample> combinedDocumentSampleStream = ObjectStreamUtils.concatenateObjectStream(categoryStreams);
    nameStreams.add(nameSampleStream);
  ObjectStream<NameSample> combinedNameSampleStream = ObjectStreamUtils.concatenateObjectStream(nameStreams);

public static ObjectStream<Event> createTrainingStream() throws IOException {
 List<Event> trainingEvents = readPpaFile("training");
 return ObjectStreamUtils.createObjectStream(trainingEvents);
}

@Test
public void buildStreamTest() throws IOException {
 String[] data = {"dog","cat","pig","frog"};
 
 // make a stream out of the data array...
 ObjectStream<String> stream = ObjectStreamUtils.createObjectStream(data);
 compare(stream, data);
 
 // make a stream out of a list...
 List<String> dataList = Arrays.asList(data);
 stream = ObjectStreamUtils.createObjectStream(Arrays.asList(data));
 compare(stream, data);
 
 // make a stream out of a set...
 // A treeSet will order the set in Alphabetical order, so
 // we can compare it with the sorted Array, but this changes the 
 // array.  so it must be checked last.
 Arrays.sort(data);
 stream = ObjectStreamUtils.createObjectStream(new TreeSet<>(dataList));
 compare(stream, data);
}

@Override
public void index(ObjectStream<Event> eventStream) throws IOException {
 int cutoff = trainingParameters.getIntParameter(CUTOFF_PARAM, CUTOFF_DEFAULT);
 boolean sort = trainingParameters.getBooleanParameter(SORT_PARAM, SORT_DEFAULT);
 long start = System.currentTimeMillis();
 display("Indexing events with OnePass using cutoff of " + cutoff + "\n\n");
 display("\tComputing event counts...  ");
 Map<String, Integer> predicateIndex = new HashMap<>();
 List<Event> events = computeEventCounts(eventStream, predicateIndex, cutoff);
 display("done. " + events.size() + " events\n");
 display("\tIndexing...  ");
 List<ComparableEvent> eventsToCompare =
   index(ObjectStreamUtils.createObjectStream(events), predicateIndex);
 display("done.\n");
 display("Sorting and merging events... ");
 sortAndMerge(eventsToCompare, sort);
 display(String.format("Done indexing in %.2f s.\n", (System.currentTimeMillis() - start) / 1000d));
}

public static ObjectStream<Event> createTrainingStream() throws IOException {
 List<Event> trainingEvents = new ArrayList<>();
 String label1 = "politics";
 String[] context1 = {"bow=the", "bow=united", "bow=nations"};
 trainingEvents.add(new Event(label1, context1));
 String label2 = "politics";
 String[] context2 = {"bow=the", "bow=united", "bow=states", "bow=and"};
 trainingEvents.add(new Event(label2, context2));
 String label3 = "sports";
 String[] context3 = {"bow=manchester", "bow=united"};
 trainingEvents.add(new Event(label3, context3));
 String label4 = "sports";
 String[] context4 = {"bow=manchester", "bow=and", "bow=barca"};
 trainingEvents.add(new Event(label4, context4));
 return ObjectStreamUtils.createObjectStream(trainingEvents);
}

 @Test
 public void testSplitTwoDocuments() throws IOException {

  StringBuilder docsString = new StringBuilder();

  for (int i = 0; i < 2; i++) {
   docsString.append("<DOC>\n");
   docsString.append("test document #").append(i).append("\n");
   docsString.append("</DOC>\n");
  }

  try (ObjectStream<String> docs = new DocumentSplitterStream(
    ObjectStreamUtils.createObjectStream(docsString.toString()))) {
   String doc1 = docs.read();
   Assert.assertEquals(docsString.length() / 2, doc1.length() + 1);
   Assert.assertTrue(doc1.contains("#0"));

   String doc2 = docs.read();
   Assert.assertEquals(docsString.length() / 2, doc2.length() + 1);
   Assert.assertTrue(doc2.contains("#1"));

   Assert.assertNull(docs.read());
   Assert.assertNull(docs.read());
  }
 }
}

@Test
public void testWithNameTypeAndInvalidData() {
 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
   ObjectStreamUtils.createObjectStream("<START:> Name <END>"))) {
  sampleStream.read();
  fail();
 } catch (IOException expected) {
  // the read above is expected to throw an exception
 }
 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
   ObjectStreamUtils.createObjectStream(
     "<START:street> <START:person> Name <END> <END>"))) {
  sampleStream.read();
  fail();
 } catch (IOException expected) {
  // the read above is expected to throw an exception
 }
}

/**
 * Checks that invalid spans cause an {@link ObjectStreamException} to be thrown.
 */
@Test
public void testWithoutNameTypeAndInvalidData() {
 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
   ObjectStreamUtils.createObjectStream("<START> <START> Name <END>"))) {
  sampleStream.read();
  fail();
 } catch (IOException expected) {
  // the read above is expected to throw an exception
 }
 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
   ObjectStreamUtils.createObjectStream("<START> Name <END> <END>"))) {
  sampleStream.read();
  fail();
 } catch (IOException expected) {
  // the read above is expected to throw an exception
 }
 try (NameSampleDataStream sampleStream = new NameSampleDataStream(
   ObjectStreamUtils.createObjectStream(
     "<START> <START> Person <END> Street <END>"))) {
  sampleStream.read();
  fail();
 } catch (IOException expected) {
  // the read above is expected to throw an exception
 }
}

@Test(expected = InsufficientTrainingDataException.class)
public void insufficientTestData() throws IOException {
 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
   new DocumentSample("1", new String[]{"a", "b", "c"}));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 0);
 DocumentCategorizerME.train("x-unspecified", samples,
   params, new DoccatFactory());
}

 @Test
 public void testIndexWithNewline() throws IOException {

  String[] sentence = "He belongs to Apache \n Software Foundation .".split(" ");

  NameContextGenerator CG = new DefaultNameContextGenerator(
      (AdaptiveFeatureGenerator[]) null);

  NameSample nameSample = new NameSample(sentence,
      new Span[] { new Span(3, 7) }, false);

  ObjectStream<Event> eventStream = new NameFinderEventStream(
      ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null);

  DataIndexer indexer = new TwoPassDataIndexer();
  indexer.init(new TrainingParameters(Collections.emptyMap()), null);
  indexer.index(eventStream);
  Assert.assertEquals(5, indexer.getContexts().length);

 }
}

 @Test
 public void testTokensAreCorrect() throws IOException {

  try (ObjectStream<Parse> samples =
    new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
   Parse p = samples.read();

   Parse[] tagNodes = p.getTagNodes();
   String[] tokens = new String[tagNodes.length];
   for (int ti = 0; ti < tagNodes.length; ti++) {
    tokens[ti] = tagNodes[ti].getCoveredText();
   }

   Assert.assertArrayEquals(sample1Tokens, tokens);
  }
 }
}

@Test
public void testSimpleReading() throws IOException {
 try (ParagraphStream paraStream = new ParagraphStream(
     ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5"))) {
  Assert.assertEquals("1\n2\n", paraStream.read());
  Assert.assertEquals("4\n5\n", paraStream.read());
  Assert.assertNull(paraStream.read());
 }
 try (ParagraphStream paraStream = new ParagraphStream(
      ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
  Assert.assertEquals("1\n2\n", paraStream.read());
  Assert.assertEquals("4\n5\n", paraStream.read());
  Assert.assertNull(paraStream.read());
 }
}

@Test
public void testThereIsExactlyOneSent() throws IOException {
 try (ObjectStream<Parse> samples =
   new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
  Assert.assertNotNull(samples.read());
  Assert.assertNull(samples.read());
  Assert.assertNull(samples.read());
 }
}

/**
 * Main Senseval Reader: This checks if the data corresponding to the words to
 * disambiguate exist in the folder, and extract the
 * 
 * @param wordTag
 *          The word, of which we are looking for the instances
 * @return the stream of {@link WSDSample} of the word to disambiguate
 */
public ObjectStream<WSDSample> getSensevalDataStream(String wordTag) {
 return ObjectStreamUtils.createObjectStream(getSensevalData(wordTag));
}

/**
 * Semcor reader: This reads all the files in semcor, and return all the
 * instances in the format {@link WSDSample} of a specific word
 * 
 * @param wordTag
 *          The word, of which we are looking for the instances
 * @return the stream of {@link WSDSample} of the word to disambiguate
 */
public ObjectStream<WSDSample> getSemcorDataStream(String wordTag) {
 return ObjectStreamUtils.createObjectStream(getSemcorData(wordTag));
}

 @Test
 public void testReset() throws IOException {
  try (ParagraphStream paraStream = new ParagraphStream(
      ObjectStreamUtils.createObjectStream("1", "2", "", "", "4", "5", ""))) {
   Assert.assertEquals("1\n2\n", paraStream.read());
   paraStream.reset();

   Assert.assertEquals("1\n2\n", paraStream.read());
   Assert.assertEquals("4\n5\n", paraStream.read());
   Assert.assertNull(paraStream.read());
  }
 }
}

@Test
public void testSimpleTraining() throws IOException {
 ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream(
   new DocumentSample("1", new String[]{"a", "b", "c"}),
   new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}),
   new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}),
   new DocumentSample("0", new String[]{"x", "y", "z"}),
   new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}),
   new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 0);
 DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
     params, new DoccatFactory());
 DocumentCategorizer doccat = new DocumentCategorizerME(model);
 double[] aProbs = doccat.categorize(new String[]{"a"});
 Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 double[] bProbs = doccat.categorize(new String[]{"x"});
 Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 //test to make sure sorted map's last key is cat 1 because it has the highest score.
 SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
 Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
 Assert.assertEquals(1, cat.size());
}

Most used methods

createObjectStream
Creates an ObjectStream form an array.
concatenateObjectStream
Creates a single concatenated ObjectStream from multiple individual ObjectStreams with the same type

Popular in Java

Running tasks concurrently on multiple threads
onCreateOptionsMenu (Activity)
getSystemService (Context)
getSupportFragmentManager (FragmentActivity)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Runner (org.openjdk.jmh.runner)
From CI to AI: The AI layer in your organization

How to useObjectStreamUtils in opennlp.tools.util

Best Java code snippets using opennlp.tools.util.ObjectStreamUtils (Showing top 20 results out of 315)

How to use
ObjectStreamUtils
in
opennlp.tools.util