@BeforeClass public static void verifyTrainingData() throws Exception { verifyTrainingData(new ADSentenceSampleStream(getLineSample(BOSQUE), false), new BigInteger("140568367548727787313497336739085858596")); verifyTrainingData(new ADSentenceSampleStream(getLineSample(FLORESTA_VIRGEM), false), new BigInteger("2614161133949079191933514776652602918")); }
updateMeta(); if (sent == null) { return null; do { if (!isTitle || (isTitle && isIncludeTitles)) { if (hasPunctuation(sent.getText())) { int start = document.length(); document.append(sent.getText()); updateMeta();
@Before public void setup() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(ADSentenceSampleStreamTest.class, "/opennlp/tools/formats/ad.sample"); try (ADSentenceSampleStream stream = new ADSentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) { SentenceSample sample; while ((sample = stream.read()) != null) { System.out.println(sample.getDocument()); System.out.println("<fim>"); samples.add(sample); } } }
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); boolean includeTitle = params.getIncludeTitles(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADSentenceSampleStream(lineStream, includeTitle); } }
updateMeta(); if (sent == null) { return null; do { if (!isTitle || (isTitle && isIncludeTitles)) { if (hasPunctuation(sent.getText())) { int start = document.length(); document.append(sent.getText()); updateMeta();
private void sentenceCrossEval(TrainingParameters params, double expectedScore) throws IOException { ADSentenceSampleStream samples = new ADSentenceSampleStream( getLineSample(FLORESTA_VIRGEM), false); SDCrossValidator cv = new SDCrossValidator(LANG, params, new SentenceDetectorFactory(LANG, true, null, new Factory().getEOSCharacters(LANG))); cv.evaluate(samples, 10); System.out.println(cv.getFMeasure()); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d); }
updateMeta(); if (sent == null) { return null; do { if (!isTitle || (isTitle && isIncludeTitles)) { if (hasPunctuation(sent.getText())) { int start = document.length(); document.append(sent.getText()); updateMeta();
ObjectStream<SentenceSample> sampleStream = new ADSentenceSampleStream( new PlainTextByLineStream(sampleDataIn, "ISO-8859-1"), true);
ObjectStream<SentenceSample> sampleStream = new ADSentenceSampleStream( new PlainTextByLineStream(sampleDataIn, "ISO-8859-1"), true);
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); boolean includeTitle = params.getIncludeTitles(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADSentenceSampleStream(lineStream, includeTitle); } }
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); boolean includeTitle = params.getIncludeTitles(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADSentenceSampleStream(lineStream, includeTitle); } }