/** Retrieve the term at position index. * @param index the global index of the term in the document, starting at zero. * @return the required term. */ public Term termNth(Integer index) { return this.getTerms().get(index) ; }
public static Set<Term> getTermsByDepAncestor(final KAFDocument document, final Term head, final boolean consecutive) { final Set<Term> descendants = document.getTermsByDepAncestors(ImmutableSet.of(head)); if (consecutive) { final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy( descendants); final int[] indexes = new int[sortedTerms.size()]; for (int i = 0; i < sortedTerms.size(); ++i) { indexes[i] = document.getTerms().indexOf(sortedTerms.get(i)); } final int h = sortedTerms.indexOf(head); boolean filtered = false; for (int i = h + 1; i < indexes.length; ++i) { filtered |= indexes[i] > indexes[i - 1] + 1; if (filtered) { descendants.remove(sortedTerms.get(i)); } } filtered = false; for (int i = h - 1; i >= 0; --i) { filtered |= indexes[i] < indexes[i + 1] - 1; if (filtered) { descendants.remove(sortedTerms.get(i)); } } } return descendants; }
private void applySRLPredicateAddition(final KAFDocument document) { for (final Term term : document.getTerms()) {
private static Span<Term> getSpanFromEntity(LKAnnotationEntity entity, KAFDocument document) { Span<Term> returnSpan = KAFDocument.newTermSpan(); if (entity.referred != null) { for (LKAnnotationEntity referredEntity : entity.referred) { Integer termID = Integer.parseInt(referredEntity.localURI); Term term = document.getTerms().get(termID - 1); returnSpan.addTarget(term); } } return returnSpan; }
for (final Term term : this.document.getTerms()) { if (isAttributeTerm(term)) { final Dep dep = this.document.getDepToTerm(term);
/** * Get all the WF ids for the terms contained in the KAFDocument. * @param kaf the KAFDocument * @return the list of all WF ids in the terms layer */ public List<String> getAllWFIdsFromTerms(KAFDocument kaf) { List<Term> terms = kaf.getTerms(); List<String> wfTermIds = new ArrayList<>(); for (int i = 0; i < terms.size(); i++) { List<WF> sentTerms = terms.get(i).getWFs(); for (WF form : sentTerms) { wfTermIds.add(form.getId()); } } return wfTermIds; }
public static List<Range> termRangesFor(final KAFDocument document, final Iterable<Term> terms) { final List<Range> ranges = Lists.newArrayList(); int startIndex = -1; int lastIndex = -2; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final int termIndex = document.getTerms().indexOf(term); if (termIndex - lastIndex > 1) { if (startIndex >= 0) { ranges.add(Range.create(startIndex, lastIndex + 1)); } startIndex = termIndex; } lastIndex = termIndex; } if (startIndex != -1 && lastIndex >= startIndex) { ranges.add(Range.create(startIndex, lastIndex + 1)); } return ranges; }
public static List<Range> rangesFor(final KAFDocument document, final Iterable<Term> terms) { final List<Range> ranges = Lists.newArrayList(); int startOffset = -1; int endOffset = -1; int termIndex = -2; for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) { final int lastTermIndex = termIndex; termIndex = document.getTerms().indexOf(term); if (termIndex - lastTermIndex > 1) { if (startOffset != -1) { ranges.add(Range.create(startOffset, endOffset)); } startOffset = term.getOffset(); } endOffset = NAFUtils.getEnd(term); } if (startOffset != -1 && endOffset > startOffset) { ranges.add(Range.create(startOffset, endOffset)); } return ranges; }
private static Span<Term> getTermsFromSpan(KAFDocument document, String span) { String[] parts = span.split("[^a-z0-9A-Z_]+"); Span<Term> termSpan = KAFDocument.newTermSpan(); if (parts.length == 1) { Integer id = getTermFromSpan(parts[0]); termSpan.addTarget(document.getTerms().get(id)); } else if (parts.length > 1) { Integer id1 = getTermFromSpan(parts[0]); Integer id2 = getTermFromSpan(parts[parts.length - 1]); for (int i = id1; i <= id2; i++) { termSpan.addTarget(document.getTerms().get(i)); } } return termSpan; }
final List<Term> docTerms = document.getTerms(); spanTerms = Lists.newArrayList(docTerms.subList( docTerms.indexOf(spanTerms.get(0)),
/***********************************************************/ static void parenthesesToKaf(String parOut, KAFDocument kaf) throws Exception { String[] tokens = Tree.tokenize(parOut); Tree.check(tokens); HashMap<Integer, Integer> parMatching = Tree.matchParentheses(tokens); HashMap<Integer, Term> termMatching = Tree.matchTerms(tokens, kaf.getTerms()); // behin-behineko irtenbidea errorea ekiditeko: hutsa itzuli if (termMatching.size() == 0) { return; } List<Tree> trees = new ArrayList<Tree>(); int current = 0; while (current < tokens.length) { int end = parMatching.get(current); NonTerminal root = Tree.createNonTerminal(tokens, current+1, end-1, parMatching, termMatching, kaf); kaf.newConstituent(root); current = end + 1; } }
tokens.addAndGet(document.getTerms().size()); long numDoc = documents.incrementAndGet();
Multimap<Term, Lexeme> lexemeMultimap = lexicon.match(document, document.getTerms());
private void applyTermSenseFiltering(final KAFDocument document) { for (final Term term : document.getTerms()) { if (term.getMorphofeat() != null && term.getMorphofeat().startsWith("NNP")) { NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SYNSET, null); NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SST, null); NAFUtils.removeRefs(term, NAFUtils.RESOURCE_BBN, null); NAFUtils.removeRefs(term, NAFUtils.RESOURCE_SUMO, null); NAFUtils.removeRefs(term, NAFUtils.RESOURCE_YAGO, null); } } }
for (Term term : nafDocument.getTerms()) { termsHashMap.put(term.getOffset(), term);
private void applyEntityAddition(final KAFDocument document) { for (final Term term : document.getTerms()) {
for (Term term : nafDoc.getTerms()) { nafTerms.put(term.getId(), term);
private void applyTermSenseCompletion(final KAFDocument document) { for (final Term term : document.getTerms()) {
for (Term term : doc.getTerms()) { Term termCopy = new Term(term, copiedWFs); this.insertTerm(termCopy);
public static void normalize(final KAFDocument document) { for (final Term term : document.getTerms()) { boolean hasBBN = false; boolean hasSynset = false;