public List<WF> getWFs() { return word.getWFs(); }
public int getOffset() { int offset = Integer.MAX_VALUE; for (WF word : this.getWFs()) { int wordOffset = word.getOffset(); if (wordOffset < offset) { offset = wordOffset; } } return offset; }
public String getId() { return word.getWFs().get(0).getId(); }
public static String getText(final Iterable<Term> terms) { final StringBuilder builder = new StringBuilder(); boolean atBeginning = true; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final boolean properNoun = term.getMorphofeat().startsWith("NNP"); for (final WF word : term.getWFs()) { builder.append(atBeginning ? "" : " "); builder.append(properNoun ? word.getForm() : word.getForm().toLowerCase()); atBeginning = false; } } return builder.toString(); }
public static List<Term> filterTerms(final Iterable<Term> terms) { final List<Term> result = Lists.newArrayList(); boolean atBeginning = true; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final char pos = Character.toUpperCase(term.getPos().charAt(0)); if (atBeginning && (pos == 'D' || pos == 'P')) { continue; } for (final WF word : term.getWFs()) { final String text = word.getForm(); if (SYMBOLS.contains(text) || !WF_EXCLUSION_PATTERN.matcher(text).matches()) { result.add(term); atBeginning = false; break; } } } return result; }
/** * Get all the WF ids for the terms contained in the KAFDocument. * @param kaf the KAFDocument * @return the list of all WF ids in the terms layer */ public List<String> getAllWFIdsFromTerms(KAFDocument kaf) { List<Term> terms = kaf.getTerms(); List<String> wfTermIds = new ArrayList<>(); for (int i = 0; i < terms.size(); i++) { List<WF> sentTerms = terms.get(i).getWFs(); for (WF form : sentTerms) { wfTermIds.add(form.getId()); } } return wfTermIds; }
private static int endOf(final Term term) { final List<WF> wfs = term.getWFs(); final WF wf = wfs.get(wfs.size() - 1); final String str = wf.getForm(); if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) { return wf.getOffset() + 1; } return wf.getOffset() + wf.getLength(); }
public static int getEnd(final Term term) { final List<WF> wfs = term.getWFs(); final WF wf = wfs.get(wfs.size() - 1); final String str = wf.getForm(); if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) { return wf.getOffset() + 1; } return wf.getOffset() + wf.getLength(); }
public List<Timex3> getTimeExsByTerm(final Term term) { final List<Timex3> result = new ArrayList<>(); outer: for (final Timex3 timex : getTimeExs()) { if (timex.getSpan() != null) { for (final WF wf : timex.getSpan().getTargets()) { if (term.getWFs().contains(wf)) { result.add(timex); continue outer; } } } } return result; }
public int getLength() { int startOffset = Integer.MAX_VALUE; int endOffset = Integer.MIN_VALUE; for (WF word : this.getWFs()) { int wordOffset = word.getOffset(); int length = word.getLength(); if (wordOffset < startOffset) { startOffset = wordOffset; } if (wordOffset + length > endOffset) { endOffset = wordOffset + length; } } return endOffset - startOffset; }
void add(Term term, int index) { terms.add(index, term); for (WF wf : term.getWFs()) { indexAnnotation(term, wf.getId(), termsIndexedByWF); } if (!term.isComponent()) { this.indexBySent(term, term.getSent(), this.termsIndexedBySent); } }
markables.put(document.getTermsHead(entity.getTerms()), entity.getTerms()); for (final WF wf : term.getWFs()) { for (final Timex3 timex : document.getTimeExsByWF(wf)) { final List<Term> span = document.getTermsByWFs(timex.getSpan().getTargets());
private void applyEntityRemoveOverlaps(final KAFDocument document) { // Consider all the entities in the document outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) { for (final Term term : entity.getTerms()) { // Remove entities whose span is contained in the span of another entity for (final Entity entity2 : document.getEntitiesByTerm(term)) { if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) { document.removeAnnotation(entity); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Removed " + NAFUtils.toString(entity) + " overlapping with " + NAFUtils.toString(entity2)); } continue outer; } } // Remove entities whose span overlaps with the span of some timex for (final WF wf : term.getWFs()) { final List<Timex3> timex = document.getTimeExsByWF(wf); if (!timex.isEmpty()) { document.removeAnnotation(entity); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Removed " + NAFUtils.toString(entity) + " overlapping with TIMEX3 '" + NAFUtils.toString(timex)); } continue outer; } } } } }
&& document.getEntitiesByTerm(dep.getFrom()).isEmpty(); if (pos != 'R' && pos != 'N' && pos != 'Q' || namePart || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty() // || !document.getEntitiesByTerm(term).isEmpty()) { continue;
if (pos != 'V' && pos != 'N' && pos != 'G' && pos != 'A' || !document.getPredicatesByTerm(term).isEmpty() || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) { continue;
for (final Timex3 timex : document.getTimeExsByWF(term.getWFs().get(0))) { final Term timexHead = NAFUtils.extractHead(document, KAFDocument .newTermSpan(document.getTermsByWFs(timex.getSpan().getTargets())));
Term term = (Term) annotation; terms.remove(term); for (WF wf : term.getWFs()) { unindexAnnotation(term, wf.getId(), termsIndexedByWF);
Element spanElem = new Element("span"); Span<WF> span = term.getSpan(); for (WF target : term.getWFs()) { Element targetElem = new Element("target"); targetElem.setAttribute("id", target.getId());
for (WF target : term.getWFs()) { Element targetElem = new Element("target"); targetElem.setAttribute("id", target.getId());