public String[] getwindowtagskfrom(View TOKENS, View POS, int startspan, int endspan, int k) { String tags[] = new String[2 * k + 1]; int startwin = startspan - k; int endwin = endspan + k; if (endwin > TOKENS.getEndSpan()) { endwin = TOKENS.getEndSpan(); } if (startwin < 0) { startwin = 0; } for (int i = startwin; i < endwin; i++) { tags[i] = POS.getLabelsCoveringSpan(i, i + 1).get(0); } return tags; }
public String[] getwindowtagskfrom(View TOKENS, View POS, int startspan, int endspan, int k) { String tags[] = new String[2 * k + 1]; int startwin = startspan - k; int endwin = endspan + k; if (endwin > TOKENS.getEndSpan()) { endwin = TOKENS.getEndSpan(); } if (startwin < 0) { startwin = 0; } for (int i = startwin; i < endwin; i++) { tags[i] = POS.getLabelsCoveringSpan(i, i + 1).get(0); } return tags; }
public String[] getwindowkfrom(View TOKENS, int startspan, int endspan, int k) { String window[] = new String[2 * k + 1]; int startwin = startspan - k; int endwin = endspan + k; if (endwin > TOKENS.getEndSpan()) { endwin = TOKENS.getEndSpan(); } if (startwin < 0) { startwin = 0; } for (int i = startwin; i < endwin; i++) { window[i - startwin] = TOKENS.getConstituentsCoveringSpan(i, i + 1).get(0).getSurfaceForm(); } return window; }
public String[] getwindowkfrom(View TOKENS, int startspan, int endspan, int k) { String window[] = new String[2 * k + 1]; int startwin = startspan - k; int endwin = endspan + k; if (endwin > TOKENS.getEndSpan()) { endwin = TOKENS.getEndSpan(); } if (startwin < 0) { startwin = 0; } for (int i = startwin; i < endwin; i++) { window[i - startwin] = TOKENS.getConstituentsCoveringSpan(i, i + 1).get(0).getSurfaceForm(); } return window; }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
/** * This extracts the special form of a given Constituent (all digits... etc) * The size of the Constituent should be 1 */ public static List<Pair<String, Boolean>> getWordTypeInformation(Constituent c){ List<Pair<String, Boolean>> ret_features = new ArrayList<>(); View tokenView = c.getTextAnnotation().getView(ViewNames.TOKENS); for (int j = -1; j < 3; j++) { int curId = c.getStartSpan() + j; if (curId < 0 || curId >= tokenView.getEndSpan()){ continue; } Constituent cCur = tokenView.getConstituentsCoveringToken(c.getStartSpan() + j).get(0); String form = cCur.toString(); boolean allCapitalized = true, allDigits = true, allNonLetters = true; for (int i = 0; i < form.length(); i++) { char ch = form.charAt(i); allCapitalized &= Character.isUpperCase(ch); allDigits &= (Character.isDigit(ch) || ch == '.' || ch == ','); allNonLetters &= !Character.isLetter(ch); } ret_features.add(new Pair<>("c" + j, allCapitalized)); ret_features.add(new Pair<>("d" + j, allDigits)); ret_features.add(new Pair<>("p" + j, allNonLetters)); } return ret_features; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }