@Test public void test() throws UIMAException, IOException { JCas jCas = JCasSingleton.getJCasInstance(); BaleenContentExtractor contentExtractor = new TestStructureContentExtractor(); contentExtractor.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); contentExtractor.processStream(null, "source", jCas); assertEquals("Title\nExample", jCas.getDocumentText()); Collection<Paragraph> select = JCasUtil.select(jCas, Paragraph.class); assertEquals(select.size(), 1); Paragraph p = select.iterator().next(); assertEquals(p.getBegin(), 6); assertEquals(p.getEnd(), 13); List<Metadata> contentMeta = JCasUtil.select(jCas, Metadata.class) .stream() .filter(m -> m.getKey().startsWith("baleen:content-")) .collect(Collectors.toList()); assertEquals(3, contentMeta.size()); }
assertEquals(paragraph.getBegin(), 0); assertEquals(paragraph.getEnd(), 5); assertEquals(paragraph.getDepth(), 6);
TemplateField cell111 = fields.get(0); assertEquals("cell1", cell111.getName()); assertEquals(paragraph1.getBegin(), cell111.getBegin()); assertEquals(paragraph1.getEnd(), cell111.getEnd()); assertEquals(R1C1P1, cell111.getCoveredText()); TemplateField cell112 = fields.get(1); assertEquals("cell1", cell112.getName()); assertEquals(paragraph2.getBegin(), cell112.getBegin()); assertEquals(paragraph2.getEnd(), cell112.getEnd()); assertEquals(R1C1P2, cell112.getCoveredText()); TemplateField cell221 = fields.get(1); assertEquals("cell2", cell221.getName()); assertEquals(paragraph5.getBegin(), cell221.getBegin()); assertEquals(paragraph5.getEnd(), cell221.getEnd()); assertEquals(R2C2P1, cell221.getCoveredText()); TemplateField cell222 = fields.get(2); assertEquals("cell2", cell222.getName()); assertEquals(paragraph6.getBegin(), cell222.getBegin()); assertEquals(paragraph6.getEnd(), cell222.getEnd()); assertEquals(R2C2P2, cell222.getCoveredText());
TemplateField para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph1.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA1.length(), para.getEnd()); assertEquals(PARA1, para.getCoveredText()); para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph2.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA2.length(), para.getEnd()); assertEquals(PARA2, para.getCoveredText()); para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph3.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA3.length(), para.getEnd()); assertEquals(PARA3, para.getCoveredText());