/** * Build the covering structure hierarchy for the given jCas, using only the structural classes * provided. * * <p>The structure is built by first using the offset of the Structure annotation and then using * the depth. * * @param jCas the jCas * @param structuralClasses the structural classes * @return the StructureHierachy */ public static CoveringStructureHierarchy build( JCas jCas, Set<Class<? extends Structure>> structuralClasses) { Node<Structure> root = StructureHierarchy.build(jCas, structuralClasses).getRoot(); Map<Annotation, Collection<Structure>> covering = buildCovering(jCas, structuralClasses); return new CoveringStructureHierarchy(root, covering); }
/** * Build the covering structure hierarchy for the given jCas, using only the structural classes * provided. * * <p>The structure is built by first using the offset of the Structure annotation and then using * the depth. * * @param jCas the jCas * @param structuralClasses the structural classes * @return the StructureHierachy */ public static CoveringStructureHierarchy build( JCas jCas, Set<Class<? extends Structure>> structuralClasses) { Node<Structure> root = StructureHierarchy.build(jCas, structuralClasses).getRoot(); Map<Annotation, Collection<Structure>> covering = buildCovering(jCas, structuralClasses); return new CoveringStructureHierarchy(root, covering); }
@Override protected void writeBody(final JCas jCas, final Element body) { final Node<Structure> root = StructureHierarchy.build(jCas, structuralClasses).getRoot(); walk(body, root); // We need to create the proper li tags under ol and ul body.select("ul > p").wrap("<li></li>"); body.select("ol > p").wrap("<li></li>"); // Correct table cells from td to th in header body.select("thead td").tagName("th"); // Add to any empty td or th's body.select("td:empty,th:empty").html(" "); if (!outputEmptyTags) { Elements e = emptyElements(body); while (!e.isEmpty()) { e.remove(); e = emptyElements(body); } } // TODO: In accordance with HTML spec // - Captions for Table should be moved inside the table // - Captions for Figure should be moved inside the figure }
public Node<Structure> createStructure(String html) throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); converter.apply(Jsoup.parse(html), jCas); return StructureHierarchy.build(jCas, structuralClasses).getRoot(); } }
document.addToIndexes(); root = StructureHierarchy.build(jCas, StructureUtil.getStructureClasses()).getRoot();