private void process(Node node, List<String> sentence, List<String> tags) { if (node != null) { for (TreeElement element : node.getElements()) { if (element.isLeaf()) { processLeaf((Leaf) element, sentence, tags); } else { process((Node) element, sentence, tags); } } } }
@Test public void testIncludeFeats() throws IOException { // add one sentence with includeFeats = true try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), false, true)) { POSSample sample = stream.read(); Assert.assertEquals(23, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art=DET=M=S", sample.getTags()[2]); Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]); Assert.assertEquals("prop=M=S", sample.getTags()[9]); } }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADPOSSampleStream(lineStream, params.getExpandME(), params.getIncludeFeatures()); }
public POSSample read() throws IOException { Sentence paragraph; while ((paragraph = this.adSentenceStream.read()) != null) { Node root = paragraph.getRoot(); List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); process(root, sentence, tags); return new POSSample(sentence, tags); } return null; }
@Test public void testSimple() throws IOException { // add one sentence with expandME = includeFeats = false try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), false, false)) { POSSample sample = stream.read(); Assert.assertEquals(23, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art", sample.getTags()[2]); Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]); Assert.assertEquals("prop", sample.getTags()[9]); } }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADPOSSampleStream(lineStream, params.getExpandME(), params.getIncludeFeatures()); }
public POSSample read() throws IOException { Sentence paragraph; while ((paragraph = this.adSentenceStream.read()) != null) { Node root = paragraph.getRoot(); List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); process(root, sentence, tags); return new POSSample(sentence, tags); } return null; }
private void process(Node node, List<String> sentence, List<String> tags) { if (node != null) { for (TreeElement element : node.getElements()) { if (element.isLeaf()) { processLeaf((Leaf) element, sentence, tags); } else { process((Node) element, sentence, tags); } } } }
@Test public void testExpandME() throws IOException { // add one sentence with expandME = true try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), true, false)) { POSSample sample = stream.read(); Assert.assertEquals(27, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art", sample.getTags()[2]); Assert.assertEquals("Porto", sample.getSentence()[9]); Assert.assertEquals("B-prop", sample.getTags()[9]); Assert.assertEquals("Poesia", sample.getSentence()[10]); Assert.assertEquals("I-prop", sample.getTags()[10]); } }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADPOSSampleStream(lineStream, params.getExpandME(), params.getIncludeFeatures()); }
public POSSample read() throws IOException { Sentence paragraph; while ((paragraph = this.adSentenceStream.read()) != null) { Node root = paragraph.getRoot(); List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); process(root, sentence, tags); return new POSSample(sentence, tags); } return null; }
private void process(Node node, List<String> sentence, List<String> tags) { if (node != null) { for (TreeElement element : node.getElements()) { if (element.isLeaf()) { processLeaf((Leaf) element, sentence, tags); } else { process((Node) element, sentence, tags); } } } }