public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens()); } }
@Before public void setup() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"); try (ADNameSampleStream stream = new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) { NameSample sample; while ((sample = stream.read()) != null) { samples.add(sample); } } }
private void tokenizerCrossEval(TrainingParameters params, double expectedScore) throws IOException { ObjectStream<NameSample> nameSamples = new ADNameSampleStream( getLineSample(FLORESTA_VIRGEM), true); DictionaryDetokenizer detokenizer = new DictionaryDetokenizer( new DetokenizationDictionary(new File("lang/pt/tokenizer/pt-detokenizer.xml"))); ObjectStream<TokenSample> samples = new NameToTokenSampleStream( detokenizer, nameSamples); TokenizerCrossValidator validator; TokenizerFactory tokFactory = TokenizerFactory.create(null, LANG, null, true, null); validator = new opennlp.tools.tokenize.TokenizerCrossValidator(params, tokFactory); validator.evaluate(samples, 10); System.out.println(validator.getFMeasure()); Assert.assertEquals(expectedScore, validator.getFMeasure().getFMeasure(), 0.0001d); }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens()); } }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens()); } }