public int getOffset() { int offset = Integer.MAX_VALUE; for (WF word : this.getWFs()) { int wordOffset = word.getOffset(); if (wordOffset < offset) { offset = wordOffset; } } return offset; }
public String getForm() { StringBuilder builder = new StringBuilder(); List<WF> sortedWFs = new ArrayList<WF>(span.getTargets()); Collections.sort(sortedWFs, WF.OFFSET_COMPARATOR); int start = -1; for (WF wf : sortedWFs){ if (start < 0) { start = wf.getOffset(); } int index = wf.getOffset() - start; if (index < builder.length()) { builder.setLength(index); } else { while (builder.length() < index) { builder.append(' '); } } builder.append(wf.getForm()); } return builder.toString(); }
public int getLength() { int startOffset = Integer.MAX_VALUE; int endOffset = Integer.MIN_VALUE; for (WF word : this.getWFs()) { int wordOffset = word.getOffset(); int length = word.getLength(); if (wordOffset < startOffset) { startOffset = wordOffset; } if (wordOffset + length > endOffset) { endOffset = wordOffset + length; } } return endOffset - startOffset; }
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }
@SuppressWarnings("deprecation") Extraction(final QuadModel model, final KAFDocument document) { // Reconstruct the document text using term offsets to avoid alignment issues final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } // Initialize the object this.model = model; this.document = document; this.vf = model.getValueFactory(); this.documentText = builder.toString(); this.documentURI = this.vf.createURI(Util.cleanIRI(document.getPublic().uri)); this.mentions = Maps.newHashMap(); }
wfElem.setAttribute("offset", Integer.toString(wf.getOffset()));
Element wfElem = new Element("wf"); wfElem.setAttribute("id", wf.getId()); wfElem.setAttribute("offset", Integer.toString(wf.getOffset())); wfElem.setAttribute("length", Integer.toString(wf.getLength())); wfElem.setAttribute("sent", Integer.toString(wf.getSent()));