protected void addParagraphAnnotations() { Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(52); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(53); paragraph2.setDepth(1); paragraph2.setEnd(105); paragraph2.addToIndexes(); Paragraph paragraph3 = new Paragraph(jCas); paragraph3.setBegin(106); paragraph3.setDepth(1); paragraph3.setEnd(158); paragraph3.addToIndexes(); Paragraph paragraph4 = new Paragraph(jCas); paragraph4.setBegin(159); paragraph4.setDepth(1); paragraph4.setEnd(211); paragraph4.addToIndexes(); Paragraph paragraph5 = new Paragraph(jCas); paragraph5.setBegin(212); paragraph5.setDepth(1); paragraph5.setEnd(212); paragraph5.addToIndexes();
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Paragraph(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
private void addStructure() { final Header header = new Header(jCas, 0, 17); header.addToIndexes(); final Paragraph paragraph = new Paragraph(jCas, 18, 38); paragraph.addToIndexes(); final Footer footer = new Footer(jCas, 40, jCas.getDocumentText().length()); footer.addToIndexes(); }
@Override public void map(JCas jCas, Element element, AnnotationCollector collector) { if (element.tagName().equalsIgnoreCase("p")) { collector.add(new Paragraph(jCas)); } } }
@Test public void test() throws UIMAException, IOException { JCas jCas = JCasSingleton.getJCasInstance(); BaleenContentExtractor contentExtractor = new TestStructureContentExtractor(); contentExtractor.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); contentExtractor.processStream(null, "source", jCas); assertEquals("Title\nExample", jCas.getDocumentText()); Collection<Paragraph> select = JCasUtil.select(jCas, Paragraph.class); assertEquals(select.size(), 1); Paragraph p = select.iterator().next(); assertEquals(p.getBegin(), 6); assertEquals(p.getEnd(), 13); List<Metadata> contentMeta = JCasUtil.select(jCas, Metadata.class) .stream() .filter(m -> m.getKey().startsWith("baleen:content-")) .collect(Collectors.toList()); assertEquals(3, contentMeta.size()); }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Paragraph(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
switch (element.tagName().toLowerCase()) { case "p": s = new Paragraph(jCas); break;
TemplateField cell111 = fields.get(0); assertEquals("cell1", cell111.getName()); assertEquals(paragraph1.getBegin(), cell111.getBegin()); assertEquals(paragraph1.getEnd(), cell111.getEnd()); assertEquals(R1C1P1, cell111.getCoveredText()); assertEquals(R1C1P1, cell111.getValue()); TemplateField cell112 = fields.get(1); assertEquals("cell1", cell112.getName()); assertEquals(paragraph2.getBegin(), cell112.getBegin()); assertEquals(paragraph2.getEnd(), cell112.getEnd()); assertEquals(R1C1P2, cell112.getCoveredText()); assertEquals(R1C1P2, cell112.getValue()); TemplateField cell221 = fields.get(1); assertEquals("cell2", cell221.getName()); assertEquals(paragraph5.getBegin(), cell221.getBegin()); assertEquals(paragraph5.getEnd(), cell221.getEnd()); assertEquals(R2C2P1, cell221.getCoveredText()); assertEquals(R2C2P1, cell221.getValue()); TemplateField cell222 = fields.get(2); assertEquals("cell2", cell222.getName()); assertEquals(paragraph6.getBegin(), cell222.getBegin()); assertEquals(paragraph6.getEnd(), cell222.getEnd()); assertEquals(R2C2P2, cell222.getCoveredText()); assertEquals(R2C2P2, cell222.getValue());
@Test public void testGenerateSimple() { Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(20); paragraph1.addToIndexes(); ItemHierarchy<Structure> structureHierarchy = StructureHierarchy.build(jCas, structuralClasses); SelectorPath path = structureHierarchy.getSelectorPath(paragraph1); assertEquals("Paragraph:nth-of-type(1)", path.toString()); }
switch (element.tagName().toLowerCase()) { case "p": s = new Paragraph(jCas); break;
assertEquals("section", r8.getName()); assertEquals(tableRow2.getEnd(), r8.getBegin()); assertEquals(paragraph1.getEnd(), r8.getEnd()); TemplateField para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph1.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA1.length(), para.getEnd()); assertEquals(PARA1, para.getCoveredText()); assertEquals(paragraph1.getEnd(), r9.getBegin()); assertEquals(paragraph2.getEnd(), r9.getEnd()); para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph2.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA2.length(), para.getEnd()); assertEquals(PARA2, para.getCoveredText()); assertEquals(paragraph2.getEnd(), r10.getBegin()); assertEquals(paragraph3.getEnd(), r10.getEnd()); para = fields.get(1); assertEquals("para", para.getName()); assertEquals(paragraph3.getBegin(), para.getBegin()); assertEquals(para.getBegin() + PARA3.length(), para.getEnd()); assertEquals(PARA3, para.getCoveredText());
@Test public void testGenerateTwo() { Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(20); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(20); paragraph2.setDepth(1); paragraph2.setEnd(TEXT.length()); paragraph2.addToIndexes(); ItemHierarchy<Structure> structureHierarchy = StructureHierarchy.build(jCas, structuralClasses); SelectorPath path1 = structureHierarchy.getSelectorPath(paragraph1); SelectorPath path2 = structureHierarchy.getSelectorPath(paragraph2); assertEquals("Paragraph:nth-of-type(1)", path1.toString()); assertEquals("Paragraph:nth-of-type(2)", path2.toString()); }
@Test public void testSelectSimple() throws UIMAException, InvalidParameterException { Paragraph paragraph = new Paragraph(jCas); paragraph.setBegin(0); paragraph.setDepth(1); paragraph.setEnd(TEXT.length()); paragraph.addToIndexes(); RecordStructureManager manager = new RecordStructureManager(StructureHierarchy.build(jCas, structuralClasses)); Optional<Structure> select = manager.select("Paragraph"); assertTrue(select.isPresent()); assertEquals(paragraph, select.get()); }
@Before public void setup() throws IOException { jCas.setDocumentText(TEXT); tempDirectory = Files.createTempDirectory(TemplateFieldJsonReportConsumerTest.class.getSimpleName()); tempDirectory.toFile().deleteOnExit(); DocumentAnnotation documentAnnotation = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); documentAnnotation.setSourceUri(SOURCEURI); Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(52); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(53); paragraph2.setDepth(1); paragraph2.setEnd(105); paragraph2.addToIndexes(); TemplateField field = new TemplateField(jCas); field.setBegin(53); field.setEnd(105); field.addToIndexes(); }
paragraph1 = new Paragraph(jCas); paragraph1.setDepth(2); paragraph1.setBegin(++cursor); cursor += PARA1.length(); paragraph1.setEnd(cursor); paragraph1.addToIndexes(); paragraph2 = new Paragraph(jCas); paragraph2.setDepth(2); paragraph2.setBegin(++cursor); cursor += PARA2.length(); paragraph2.setEnd(cursor); paragraph2.addToIndexes(); paragraph3 = new Paragraph(jCas); paragraph3.setDepth(2); paragraph3.setBegin(++cursor); cursor += PARA3.length(); paragraph3.setEnd(cursor); paragraph3.addToIndexes(); paragraph4 = new Paragraph(jCas); paragraph4.setDepth(2); paragraph4.setBegin(++cursor); cursor += PARA4.length(); paragraph4.setEnd(cursor); paragraph4.addToIndexes(); paragraph5 = new Paragraph(jCas);
@Test public void testGenerateNested2() { Section section = new Section(jCas); section.setBegin(0); section.setDepth(1); section.setEnd(TEXT.length()); section.addToIndexes(); Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(2); paragraph1.setEnd(20); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(20); paragraph2.setDepth(2); paragraph2.setEnd(TEXT.length()); paragraph2.addToIndexes(); ItemHierarchy<Structure> structureHierarchy = StructureHierarchy.build(jCas, structuralClasses); SelectorPath path1 = structureHierarchy.getSelectorPath(paragraph1); SelectorPath path2 = structureHierarchy.getSelectorPath(paragraph2); assertEquals("Section:nth-of-type(1) > Paragraph:nth-of-type(1)", path1.toString()); assertEquals("Section:nth-of-type(1) > Paragraph:nth-of-type(2)", path2.toString()); }
@Test public void testSelectNthTwo() throws InvalidParameterException { Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(20); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(20); paragraph2.setDepth(1); paragraph2.setEnd(TEXT.length()); paragraph2.addToIndexes(); RecordStructureManager manager = new RecordStructureManager(StructureHierarchy.build(jCas, structuralClasses)); Optional<Structure> select1 = manager.select("Paragraph:nth-of-type(1)"); assertTrue(select1.isPresent()); assertEquals(paragraph1, select1.get()); assertNotEquals(paragraph2, select1.get()); Optional<Structure> select2 = manager.select("Paragraph:nth-of-type(2)"); assertTrue(select2.isPresent()); assertEquals(paragraph2, select2.get()); assertNotEquals(paragraph1, select2.get()); Optional<Structure> select3 = manager.select("Paragraph:nth-of-type(3)"); assertFalse(select3.isPresent()); }
documentAnnotation.setSourceUri(SOURCEURI); Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(1); paragraph1.setEnd(52); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(53); paragraph2.setDepth(1); paragraph2.setEnd(105); paragraph2.addToIndexes(); Paragraph paragraph3 = new Paragraph(jCas); paragraph3.setBegin(106); paragraph3.setDepth(1); paragraph3.setEnd(158); paragraph3.addToIndexes(); Paragraph paragraph4 = new Paragraph(jCas); paragraph4.setBegin(159); paragraph4.setDepth(1); paragraph4.setEnd(212); paragraph4.addToIndexes();
@Test public void testGenerateNestedToDepth1() { Section section = new Section(jCas); section.setBegin(0); section.setDepth(1); section.setEnd(TEXT.length()); section.addToIndexes(); Paragraph paragraph1 = new Paragraph(jCas); paragraph1.setBegin(0); paragraph1.setDepth(2); paragraph1.setEnd(20); paragraph1.addToIndexes(); Paragraph paragraph2 = new Paragraph(jCas); paragraph2.setBegin(20); paragraph2.setDepth(2); paragraph2.setEnd(TEXT.length()); paragraph2.addToIndexes(); ItemHierarchy<Structure> structureHierarchy = StructureHierarchy.build(jCas, structuralClasses); SelectorPath path1 = structureHierarchy.getSelectorPath(paragraph1); SelectorPath path2 = structureHierarchy.getSelectorPath(paragraph1); assertEquals("Section:nth-of-type(1)", path1.toDepth(1).toString()); assertEquals("Section:nth-of-type(1)", path2.toDepth(1).toString()); }