private static ObjectStream<POSSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( POSTaggerFactoryTest.class, "/opennlp/tools/postag/AnnotatedSentences.txt"); return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
new WordTagSampleStream(new CollectionObjectStream<>(sampleString))) { POSSample sample = stream.read(); String[] words = sample.getSentence(); Assert.assertEquals("x6", tags[5]); Assert.assertNull(stream.read()); stream.reset(); Assert.assertNotNull(stream.read());
private static ObjectStream<POSSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, "/opennlp/tools/postag/AnnotatedSentences.txt"); return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new WordTagSampleStream(lineStream); } }
@Test(expected = InsufficientTrainingDataException.class) public void insufficientTestData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt"); ObjectStream<POSSample> stream = new WordTagSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name()); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); POSTaggerME.train("eng", stream, params, new POSTaggerFactory()); }
if (encoding == null) { if (dict == null) { ss = new POSSampleSequenceStream(new WordTagSampleStream( new InputStreamReader(new FileInputStream(inFile)))); ss = new POSSampleSequenceStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile)))), cg); ss = new POSSampleSequenceStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile), encoding)))); ss = new POSSampleSequenceStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile), encoding))), cg); if (encoding == null) { if (dict == null) { es = new POSSampleEventStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile))))); es = new POSSampleEventStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile)))), cg); es = new POSSampleEventStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile), encoding)))); es = new POSSampleEventStream(new WordTagSampleStream(( new InputStreamReader(new FileInputStream(inFile), encoding))), cg);
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new WordTagSampleStream(lineStream); } }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new WordTagSampleStream(lineStream); } }
/** * Construct a CrossValidator. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public POSCrossValidator(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final ObjectStream<String> trainStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new WordTagSampleStream(trainStream); this.dictCutOff = Flags.getAutoDictFeatures(params); this.folds = Flags.getFolds(params); createPOSFactory(params); getEvalListeners(params); }