@Test public void test() throws UIMAException, IOException { JCas jCas = JCasSingleton.getJCasInstance(); BaleenContentExtractor contentExtractor = new TestStructureContentExtractor(); contentExtractor.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); contentExtractor.processStream(null, "source", jCas); assertEquals("Title\nExample", jCas.getDocumentText()); Collection<Paragraph> select = JCasUtil.select(jCas, Paragraph.class); assertEquals(select.size(), 1); Paragraph p = select.iterator().next(); assertEquals(p.getBegin(), 6); assertEquals(p.getEnd(), 13); List<Metadata> contentMeta = JCasUtil.select(jCas, Metadata.class) .stream() .filter(m -> m.getKey().startsWith("baleen:content-")) .collect(Collectors.toList()); assertEquals(3, contentMeta.size()); }
assertEquals(entity.getEnd(), 5); assertEquals(paragraph.getBegin(), 0); assertEquals(paragraph.getEnd(), 5); assertEquals(paragraph.getDepth(), 6);
assertEquals("cell1", cell111.getName()); assertEquals(paragraph1.getBegin(), cell111.getBegin()); assertEquals(paragraph1.getEnd(), cell111.getEnd()); assertEquals(R1C1P1, cell111.getCoveredText()); assertEquals(R1C1P1, cell111.getValue()); assertEquals("cell1", cell112.getName()); assertEquals(paragraph2.getBegin(), cell112.getBegin()); assertEquals(paragraph2.getEnd(), cell112.getEnd()); assertEquals(R1C1P2, cell112.getCoveredText()); assertEquals(R1C1P2, cell112.getValue()); assertEquals("cell2", cell221.getName()); assertEquals(paragraph5.getBegin(), cell221.getBegin()); assertEquals(paragraph5.getEnd(), cell221.getEnd()); assertEquals(R2C2P1, cell221.getCoveredText()); assertEquals(R2C2P1, cell221.getValue()); assertEquals("cell2", cell222.getName()); assertEquals(paragraph6.getBegin(), cell222.getBegin()); assertEquals(paragraph6.getEnd(), cell222.getEnd()); assertEquals(R2C2P2, cell222.getCoveredText()); assertEquals(R2C2P2, cell222.getValue());
assertEquals("section", r8.getName()); assertEquals(tableRow2.getEnd(), r8.getBegin()); assertEquals(paragraph1.getEnd(), r8.getEnd()); assertEquals(paragraph1.getEnd(), r9.getBegin()); assertEquals(paragraph2.getEnd(), r9.getEnd()); assertEquals(paragraph2.getEnd(), r10.getBegin()); assertEquals(paragraph3.getEnd(), r10.getEnd());