@Override public String toString() { return this.getForm(); }
WF(WF wf, AnnotationContainer annotationContainer) { super(wf.getId()); this.annotationContainer = annotationContainer; this.sent = wf.sent; this.para = wf.para; this.page = wf.page; this.offset = wf.offset; this.length = wf.length; this.xpath = wf.xpath; this.form = wf.form; }
public int getLength() { int startOffset = Integer.MAX_VALUE; int endOffset = Integer.MIN_VALUE; for (WF word : this.getWFs()) { int wordOffset = word.getOffset(); int length = word.getLength(); if (wordOffset < startOffset) { startOffset = wordOffset; } if (wordOffset + length > endOffset) { endOffset = wordOffset + length; } } return endOffset - startOffset; }
/** * Creates a new WF object. It assigns an appropriate ID to it. The WF is added to the document object. * * @param form text of the word form itself. * @return a new word form. */ public WF newWF(String form, int offset, int sent) { String newId = idManager.wfs.getNext(); WF newWF = new WF(this.annotationContainer, newId, form, sent); newWF.setOffset(offset); newWF.setLength(form.length()); annotationContainer.add(newWF); return newWF; }
for (WF wf : text) { Element wfElem = new Element("wf"); wfElem.setAttribute("id", wf.getId()); wfElem.setAttribute("sent", Integer.toString(wf.getSent())); if (wf.hasPara()) { wfElem.setAttribute("para", Integer.toString(wf.getPara())); if (wf.hasPage()) { wfElem.setAttribute("page", Integer.toString(wf.getPage())); if (wf.hasOffset()) { wfElem.setAttribute("offset", Integer.toString(wf.getOffset())); if (wf.hasLength()) { wfElem.setAttribute("length", Integer.toString(wf.getLength())); if (wf.hasXpath()) { wfElem.setAttribute("xpath", wf.getXpath()); wfElem.setText(wf.getForm()); textElem.addContent(wfElem); for (WF target : span.getTargets()) { Element targetElem = new Element("target"); targetElem.setAttribute("id", target.getId()); if (target == span.getHead()) { targetElem.setAttribute("head", "yes"); for (WF target : e.getWFs().getTargets()) {
public String getForm() { StringBuilder builder = new StringBuilder(); List<WF> sortedWFs = new ArrayList<WF>(span.getTargets()); Collections.sort(sortedWFs, WF.OFFSET_COMPARATOR); int start = -1; for (WF wf : sortedWFs){ if (start < 0) { start = wf.getOffset(); } int index = wf.getOffset() - start; if (index < builder.length()) { builder.setLength(index); } else { while (builder.length() < index) { builder.append(' '); } } builder.append(wf.getForm()); } return builder.toString(); }
@Override public Integer getSent() { return this.word.getSent(); }
terms.remove(term); for (WF wf : term.getWFs()) { unindexAnnotation(term, wf.getId(), termsIndexedByWF); if (timex3.getSpan().getTargets() != null) { for (WF wf : timex3.getSpan().getTargets()) { unindexAnnotation(timex3, wf.getId(), timeExsIndexedByWF); unindexBySent(timex3, timex3.getSpan().getTargets().get(0).getSent(), timeExsIndexedBySent); if (factuality.getWFs() != null) { for (WF wf : factuality.getWFs()) { unindexAnnotation(factuality, wf.getId(), factsIndexedByWF); if (entity.getWFs() != null) { for (WF wf : entity.getWFs().getTargets()) { unindexAnnotation(entity, wf.getId(), linkedEntitiesIndexedByWF);
@Override public Integer getPara() { return this.word.getPara(); }
public WF newWF(int offset, Integer length, String form, int sent) { String newId = idManager.getNextId(AnnotationType.WF); WF newWF = new WF(this.annotationContainer, newId, offset, length, form, sent); annotationContainer.add(newWF, Layer.TEXT, AnnotationType.WF); return newWF; }
for (WF wf : text) { Element wfElem = new Element("wf"); wfElem.setAttribute("id", wf.getId()); wfElem.setAttribute("offset", Integer.toString(wf.getOffset())); wfElem.setAttribute("length", Integer.toString(wf.getLength())); wfElem.setAttribute("sent", Integer.toString(wf.getSent())); if (wf.hasPara()) { wfElem.setAttribute("para", Integer.toString(wf.getPara())); if (wf.hasPage()) { wfElem.setAttribute("page", Integer.toString(wf.getPage())); if (wf.hasXpath()) { wfElem.setAttribute("xpath", wf.getXpath()); wfElem.setText(wf.getForm()); textElem.addContent(wfElem); for (WF target : span.getTargets()) { Element targetElem = new Element("target"); targetElem.setAttribute("id", target.getId()); if (target == span.getHead()) { targetElem.setAttribute("head", "yes"); for (WF target : span.getTargets()) { Element targetElem = new Element("target"); targetElem.setAttribute("id", target.getId()); if (target == span.getHead()) { targetElem.setAttribute("head", "yes");
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }