@Override public String toString() { return this.getForm(); }
@Override public String toString() { return this.getForm(); }
public String getSpanStr() { String str = ""; for (WF wf : mentions.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public String getSpanStr(Span<WF> span) { String str = ""; for (WF wf : span.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public String getSpanStr(Span<WF> span) { String str = ""; for (WF wf : span.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public String getSpanStr() { String str = ""; for (WF wf : mentions.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public String getStr() { String str = ""; for (WF wf : span.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public String getForm() { String str = ""; for (WF wf : span.getTargets()) { if (!str.isEmpty()) { str += " "; } str += wf.getForm(); } return str; }
public static String getText(final Iterable<Term> terms) { final StringBuilder builder = new StringBuilder(); boolean atBeginning = true; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final boolean properNoun = term.getMorphofeat().startsWith("NNP"); for (final WF word : term.getWFs()) { builder.append(atBeginning ? "" : " "); builder.append(properNoun ? word.getForm() : word.getForm().toLowerCase()); atBeginning = false; } } return builder.toString(); }
public String getStr() { StringBuilder builder = new StringBuilder(); for (Object term : targets) { if (builder.length() != 0) { builder.append(' '); } if (term instanceof Term) { builder.append(((Term) term).getStr()); } else if (term instanceof WF) { builder.append(((WF) term).getForm()); } else { builder.append(term.toString()); } } return builder.toString(); }
public static List<Term> filterTerms(final Iterable<Term> terms) { final List<Term> result = Lists.newArrayList(); boolean atBeginning = true; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final char pos = Character.toUpperCase(term.getPos().charAt(0)); if (atBeginning && (pos == 'D' || pos == 'P')) { continue; } for (final WF word : term.getWFs()) { final String text = word.getForm(); if (SYMBOLS.contains(text) || !WF_EXCLUSION_PATTERN.matcher(text).matches()) { result.add(term); atBeginning = false; break; } } } return result; }
private static int endOf(final Term term) { final List<WF> wfs = term.getWFs(); final WF wf = wfs.get(wfs.size() - 1); final String str = wf.getForm(); if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) { return wf.getOffset() + 1; } return wf.getOffset() + wf.getLength(); }
public static int getEnd(final Term term) { final List<WF> wfs = term.getWFs(); final WF wf = wfs.get(wfs.size() - 1); final String str = wf.getForm(); if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) { return wf.getOffset() + 1; } return wf.getOffset() + wf.getLength(); }
public String getForm() { StringBuilder builder = new StringBuilder(); List<WF> sortedWFs = new ArrayList<WF>(span.getTargets()); Collections.sort(sortedWFs, WF.OFFSET_COMPARATOR); int start = -1; for (WF wf : sortedWFs){ if (start < 0) { start = wf.getOffset(); } int index = wf.getOffset() - start; if (index < builder.length()) { builder.setLength(index); } else { while (builder.length() < index) { builder.append(' '); } } builder.append(wf.getForm()); } return builder.toString(); }
final String[] tokens = new String[wfs.size()]; for (int i = 0; i < wfs.size(); i++) { tokens[i] = wfs.get(i).getForm(); final List<WF> wfTarget = new ArrayList<WF>(); wfTarget.add(wfs.get(i));
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }
@SuppressWarnings("deprecation") Extraction(final QuadModel model, final KAFDocument document) { // Reconstruct the document text using term offsets to avoid alignment issues final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } // Initialize the object this.model = model; this.document = document; this.vf = model.getValueFactory(); this.documentText = builder.toString(); this.documentURI = this.vf.createURI(Util.cleanIRI(document.getPublic().uri)); this.mentions = Maps.newHashMap(); }
String[] tokenIds = new String[sentence.size()]; for (int i = 0; i < sentence.size(); i++) { tokens[i] = sentence.get(i).getForm(); tokenIds[i] = sentence.get(i).getId();
final String[] tokens = new String[wfs.size()]; for (int i = 0; i < wfs.size(); i++) { tokens[i] = wfs.get(i).getForm(); final List<WF> wfTarget = new ArrayList<WF>(); wfTarget.add(wfs.get(i));
String[] tokenIds = new String[sentence.size()]; for (int i = 0; i < sentence.size(); i++) { tokens[i] = sentence.get(i).getForm(); tokenIds[i] = sentence.get(i).getId();