public Integer getFirstParagraph() { return this.getWFs().get(0).getPara(); }
public Integer getFirstSentence() { return this.getWFs().get(0).getSent(); }
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }
@SuppressWarnings("deprecation") Extraction(final QuadModel model, final KAFDocument document) { // Reconstruct the document text using term offsets to avoid alignment issues final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } // Initialize the object this.model = model; this.document = document; this.vf = model.getValueFactory(); this.documentText = builder.toString(); this.documentURI = this.vf.createURI(Util.cleanIRI(document.getPublic().uri)); this.mentions = Maps.newHashMap(); }
List<WF> wFs = document.getWFs(); for (WF wf : wFs) { buffer.append(wf.getForm());
int nafWordCount = document.getWFs().size();
for (WF wf : document.getWFs()) { String id = wf.getId(); id = id.replace('w', 't');
for (WF wf : doc.getWFs()) { WF wfCopy = new WF(wf, this.annotationContainer); this.insertWF(wfCopy);