private static String getSafeToken(TextAnnotation ta, int wordPosition) { String token = "*"; if (wordPosition >= 0 && wordPosition < ta.size()) token = ta.getToken(wordPosition); return token; }
private static String getSafeToken(TextAnnotation ta, int wordPosition) { String token = "*"; if (wordPosition >= 0 && wordPosition < ta.size()) token = ta.getToken(wordPosition); return token; }
@Override public String toString() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < this.textAnnotation.size(); i++) { String label = this.getLabel(i); if (label.length() == 0) sb.append(this.getTextAnnotation().getToken(i)).append(" "); else sb.append("(").append(this.getLabel(i)).append(" ") .append(this.textAnnotation.getToken(i)).append(") "); } return sb.toString(); } }
@Override public String toString() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < this.textAnnotation.size(); i++) { String label = this.getLabel(i); if (label.length() == 0) sb.append(this.getTextAnnotation().getToken(i)).append(" "); else sb.append("(").append(this.getLabel(i)).append(" ") .append(this.textAnnotation.getToken(i)).append(") "); } return sb.toString(); } }
public static Constituent isBigramPrep(TextAnnotation ta, int tokenId, String viewName) { String word = WordHelpers.getWord(ta, tokenId); if (tokenId < ta.size() - 1) { String nextWord = WordHelpers.getWord(ta, tokenId + 1); if (mwPrepositionsList.contains(word + " " + nextWord)) return new Constituent("", viewName, ta, tokenId, tokenId + 2); } return null; }
public static Constituent isBigramPrep(TextAnnotation ta, int tokenId, String viewName) { String word = WordHelpers.getWord(ta, tokenId); if (tokenId < ta.size() - 1) { String nextWord = WordHelpers.getWord(ta, tokenId + 1); if (mwPrepositionsList.contains(word + " " + nextWord)) return new Constituent("", viewName, ta, tokenId, tokenId + 2); } return null; }
public static boolean isPrep(TextAnnotation ta, int tokenId) { String pos = WordHelpers.getPOS(ta, tokenId); String word = WordHelpers.getWord(ta, tokenId); String lowerCase = word.toLowerCase().trim(); boolean validPreposition = prepositions.contains(lowerCase); boolean isPrepositionPOS = POSUtils.isPOSPreposition(pos); // we need to consider the case of "to + verb" boolean isToVP = false; if (tokenId < ta.size() - 1) { if (lowerCase.equals("to") && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1))) isToVP = true; } return validPreposition && (isPrepositionPOS && !isToVP); }
private void convertPredicateArgView(TextAnnotation ta, PredicateArgumentView pav, List<String[]> columns, boolean addSense) { List<Constituent> predicates = new ArrayList<>(); if (pav != null) predicates = pav.getPredicates(); Collections.sort(predicates, TextAnnotationUtilities.constituentStartComparator); int size = ta.size(); addPredicateInfo(columns, predicates, size, addSense); for (Constituent predicate : predicates) { assert pav != null; List<Relation> args = pav.getArguments(predicate); String[] paInfo = addPredicateArgInfo(predicate, args, size); columns.add(paInfo); } }
private static void convertPredicateArgView(TextAnnotation ta, PredicateArgumentView pav, List<String[]> columns, boolean addSense) { List<Constituent> predicates = new ArrayList<>(); if (pav != null) predicates = pav.getPredicates(); Collections.sort(predicates, TextAnnotationUtilities.constituentStartComparator); int size = ta.size(); addPredicateInfo(columns, predicates, size, addSense); for (Constituent predicate : predicates) { assert pav != null; List<Relation> args = pav.getArguments(predicate); String[] paInfo = addPredicateArgInfo(predicate, args, size); columns.add(paInfo); } }
public static boolean isPrep(TextAnnotation ta, int tokenId) { String pos = WordHelpers.getPOS(ta, tokenId); String word = WordHelpers.getWord(ta, tokenId); String lowerCase = word.toLowerCase().trim(); boolean validPreposition = prepositions.contains(lowerCase); boolean isPrepositionPOS = POSUtils.isPOSPreposition(pos); // we need to consider the case of "to + verb" boolean isToVP = false; if (tokenId < ta.size() - 1) { if (lowerCase.equals("to") && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1))) isToVP = true; } return validPreposition && (isPrepositionPOS && !isToVP); }
public List<Constituent> getPredicates(TextAnnotation ta) throws Exception { List<Constituent> list = new ArrayList<>(); for (int i = 0; i < ta.size(); i++) { Option<String> opt = getLemma(ta, i); if (opt.isPresent()) { Constituent c = new Constituent("", "", ta, i, i + 1); c.addAttribute(PredicateArgumentView.LemmaIdentifier, opt.get()); list.add(c); } } return list; }
public static Constituent isTrigramPrep(TextAnnotation ta, int tokenId, String viewName) { String word = WordHelpers.getWord(ta, tokenId); if (tokenId < ta.size() - 2) { String nextWord = WordHelpers.getWord(ta, tokenId + 1); String nextNextWord = WordHelpers.getWord(ta, tokenId + 2); if (mwPrepositionsList.contains(word + " " + nextWord + " " + nextNextWord)) return new Constituent("", viewName, ta, tokenId, tokenId + 3); } return null; }
public List<Constituent> getPredicates(TextAnnotation ta) throws Exception { List<Constituent> list = new ArrayList<>(); for (int i = 0; i < ta.size(); i++) { Option<String> opt = getLemma(ta, i); if (opt.isPresent()) { Constituent c = new Constituent("", "", ta, i, i + 1); c.addAttribute(PredicateArgumentView.LemmaIdentifier, opt.get()); list.add(c); } } return list; }
public List<Constituent> getPredicates(TextAnnotation ta) throws Exception { List<Constituent> list = new ArrayList<>(); for (int i = 0; i < ta.size(); i++) { Option<String> opt = getLemma(ta, i); if (opt.isPresent()) { Constituent c = new Constituent("", "", ta, i, i + 1); c.addAttribute(PredicateArgumentView.LemmaIdentifier, opt.get()); list.add(c); } } return list; }
public static Constituent isTrigramPrep(TextAnnotation ta, int tokenId, String viewName) { String word = WordHelpers.getWord(ta, tokenId); if (tokenId < ta.size() - 2) { String nextWord = WordHelpers.getWord(ta, tokenId + 1); String nextNextWord = WordHelpers.getWord(ta, tokenId + 2); if (mwPrepositionsList.contains(word + " " + nextWord + " " + nextNextWord)) return new Constituent("", viewName, ta, tokenId, tokenId + 3); } return null; }
public static void printPredicateArgumentView(PredicateArgumentView pav, PrintWriter out) { List<String[]> columns = new ArrayList<>(); convertPredicateArgView(pav.getTextAnnotation(), pav, columns, false); String[][] tr = transpose(columns, pav.getTextAnnotation().size()); printFormatted(tr, out, pav.getTextAnnotation()); }
public void printPredicateArgumentView(PredicateArgumentView pav, PrintWriter out) { // System.out.println("*" + pav + "*"); List<String[]> columns = new ArrayList<>(); convertPredicateArgView(pav.getTextAnnotation(), pav, columns, false); String[][] tr = transpose(columns, pav.getTextAnnotation().size()); printFormatted(tr, out, pav.getTextAnnotation()); }
/** * Checks whether there is actual content in * the given page. * @param page * @param meta * @param ta * @return */ public static boolean isNoncontentPage(WikiArticle page, PageMeta meta, TextAnnotation ta) { return ta == null || ta.size() == 0 || !page.isMain() || page.getText() == null || meta.isDisambiguationPage() || meta.isRedirect(); }
@Override public void addView(TextAnnotation input) { TokenLabelView view = new TokenLabelView(getViewName(), "PorterStemmer", input, 1.0); synchronized (instance) { for (int i = 0; i < input.size(); i++) { stemmer.setCurrent(input.getToken(i)); stemmer.stem(); view.addTokenLabel(i, stemmer.getCurrent(), 1.0); } } input.addView(getViewName(), view); }
@Override public void addView(TextAnnotation input) { TokenLabelView view = new TokenLabelView(getViewName(), "PorterStemmer", input, 1.0); synchronized (instance) { for (int i = 0; i < input.size(); i++) { stemmer.setCurrent(input.getToken(i)); stemmer.stem(); view.addTokenLabel(i, stemmer.getCurrent(), 1.0); } } input.addView(getViewName(), view); }