opennlp.tools.formats.ResourceAsStreamFactory.<init> java code examples

private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(EvalitaNameSampleStreamTest.class,
   "/opennlp/tools/formats/" + name);
 return new EvalitaNameSampleStream(lang, in, EvalitaNameSampleStream.GENERATE_PERSON_ENTITIES);
}

private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(Conll03NameSampleStreamTest.class,
   "/opennlp/tools/formats/" + name);
 return new Conll03NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
}

private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(Conll02NameSampleStreamTest.class,
   "/opennlp/tools/formats/" + name);
 return new Conll02NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
}

private static ObjectStream<StringList> openData(String name)
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   NameFinderCensus90NameStreamTest.class,
   "/opennlp/tools/formats/" + name);
 return new NameFinderCensus90NameStream(in, StandardCharsets.UTF_8);
}

 public static LanguageDetectorSampleStream createSampleStream() throws IOException {

  ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory(
    LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt");

  PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8");

  return new LanguageDetectorSampleStream(lineStream);
 }
}

private static ObjectStream<SentenceSample> createSampleStream()
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   SentenceDetectorFactoryTest.class,
   "/opennlp/tools/sentdetect/Sentences.txt");
 return new SentenceSampleStream(new PlainTextByLineStream(
   in, StandardCharsets.UTF_8));
}

private static ObjectStream<POSSample> createSampleStream()
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   POSTaggerFactoryTest.class,
   "/opennlp/tools/postag/AnnotatedSentences.txt");
 return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<Parse> createParseSampleStream() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   ParseSampleStreamTest.class, "/opennlp/tools/parser/test.parse");
 return new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

@Before
public void setup() throws IOException {
 ResourceAsStreamFactory stream = new ResourceAsStreamFactory(
   getClass(), "/opennlp/tools/formats/brown-cluster.txt");
 BrownCluster brownCluster = new BrownCluster(stream.createInputStream()); 
 
 generator = new BrownBigramFeatureGenerator(brownCluster);
}

private static ObjectStream<ChunkSample> createSampleStream()
  throws IOException {
 ResourceAsStreamFactory in = new ResourceAsStreamFactory(
   ChunkerFactoryTest.class, "/opennlp/tools/chunker/test.txt");
 return new ChunkSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

 private static ADSentenceStream openData() throws IOException {
  InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
    "/opennlp/tools/formats/ad.sample");

  return new ADSentenceStream(new PlainTextByLineStream(in, "UTF-8"));
 }
}

private static ObjectStream<POSSample> createSampleStream() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
   "/opennlp/tools/postag/AnnotatedSentences.txt");
 return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<TokenSample> createSampleStream()
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   TokenizerFactoryTest.class, "/opennlp/tools/tokenize/token.train");
 return new TokenSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<DocumentSample> createSampleStream()
  throws IOException {
 InputStreamFactory isf = new ResourceAsStreamFactory(
   DoccatFactoryTest.class, "/opennlp/tools/doccat/DoccatSample.txt");
 return new DocumentSampleStream(new PlainTextByLineStream(isf, "UTF-8"));
}

@Before
public void setup() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
   "/opennlp/tools/formats/ad.sample");
 try (ADNameSampleStream stream =
     new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
  NameSample sample;
  while ((sample = stream.read()) != null) {
   samples.add(sample);
  }
 }
}

@Before
public void setup() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample");
 try (ADChunkSampleStream stream = new ADChunkSampleStream(new PlainTextByLineStream(in, "UTF-8"))) {
  ChunkSample sample;
  while ((sample = stream.read()) != null) {
   samples.add(sample);
  }
 }
}

@Test(expected = InsufficientTrainingDataException.class)
public void testInsufficientData() throws IOException {
 ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(),
   "/opennlp/tools/chunker/test-insufficient.txt");
 ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
}

@Test(expected = InsufficientTrainingDataException.class)
public void testInsufficientData() throws IOException {
 InputStreamFactory trainDataIn = new ResourceAsStreamFactory(
   TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train");
 ObjectStream<TokenSample> samples = new TokenSampleStream(
   new PlainTextByLineStream(trainDataIn, StandardCharsets.UTF_8));
 TrainingParameters mlParams = new TrainingParameters();
 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
 mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);
 TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
}

@BeforeClass
public static void train() throws Exception {
 ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory(
   LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt");
 PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8");
 LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream);
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, "100");
 params.put(TrainingParameters.CUTOFF_PARAM, "5");
 params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES");
 model = LanguageDetectorME.train(sampleStream, params, new DummyFactory());
}

@Test(expected = InsufficientTrainingDataException.class)
public void insufficientTestData() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
   "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt");
 ObjectStream<POSSample> stream = new WordTagSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 5);
 POSTaggerME.train("eng", stream, params, new POSTaggerFactory());
}

Popular methods of ResourceAsStreamFactory

createInputStream

Popular in Java

Updating database using SQL prepared statement
compareTo (BigDecimal)
getContentResolver (Context)
putExtra (Intent)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Best plugins for Eclipse

How to use opennlp.tools.formats.ResourceAsStreamFactoryconstructor

Best Java code snippets using opennlp.tools.formats.ResourceAsStreamFactory.<init> (Showing top 20 results out of 315)

How to use
opennlp.tools.formats.ResourceAsStreamFactory
constructor