public List<Constituent> getConstituentsCoveringTokens(Collection<Integer> tokenIds) { Set<Constituent> output = new HashSet<>(); for (int token : tokenIds) { output.addAll(getConstituentsCoveringToken(token)); } List<Constituent> list = new ArrayList<>(output); Collections.sort(list, TextAnnotationUtilities.constituentStartComparator); return list; }
public List<Constituent> getConstituentsCoveringSpan(int start, int end) { Set<Constituent> output = new HashSet<>(); for (int token = start; token < end; token++) { output.addAll(getConstituentsCoveringToken(token)); } List<Constituent> list = new ArrayList<>(output); Collections.sort(list, TextAnnotationUtilities.constituentStartComparator); return list; }
public List<Constituent> getConstituentsCoveringTokens(Collection<Integer> tokenIds) { Set<Constituent> output = new HashSet<>(); for (int token : tokenIds) { output.addAll(getConstituentsCoveringToken(token)); } List<Constituent> list = new ArrayList<>(output); Collections.sort(list, TextAnnotationUtilities.constituentStartComparator); return list; }
private String getWord(TextAnnotation ta, int token) { return ta.getView(ViewNames.TOKENS).getConstituentsCoveringToken(token).get(0) .getTokenizedSurfaceForm(); }
private String getWord(TextAnnotation ta, int token) { return ta.getView(ViewNames.TOKENS).getConstituentsCoveringToken(token).get(0) .getTokenizedSurfaceForm(); }
public static void addHeadAttributes(Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = head.getStartSpan(); i < head.getEndSpan(); i++) { head.addAttribute("GAZ" + i, ((FlatGazetteers) gazetteers).annotateConstituent(tokenView.getConstituentsCoveringToken(i).get(0), false)); head.addAttribute("BC" + i, brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(i).get(0).toString())); } head.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(head)); }
public static void addHeadAttributes(Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = head.getStartSpan(); i < head.getEndSpan(); i++) { head.addAttribute("GAZ" + i, ((FlatGazetteers) gazetteers).annotateConstituent(tokenView.getConstituentsCoveringToken(i).get(0), false)); head.addAttribute("BC" + i, brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(i).get(0).toString())); } head.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(head)); }
public static void addHeadAttributes(Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = head.getStartSpan(); i < head.getEndSpan(); i++) { head.addAttribute("GAZ" + i, ((FlatGazetteers) gazetteers).annotateConstituent(tokenView.getConstituentsCoveringToken(i).get(0), false)); head.addAttribute("BC" + i, brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(i).get(0).toString())); } head.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(head)); }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
public String discreteValue(Object __example) { if (!(__example instanceof Relation)) { String type = __example == null ? "null" : __example.getClass().getName(); System.err.println("Classifier 'afterHeadPOS(Relation)' defined on line 45 of extent.lbj received '" + type + "' as input."); new Exception().printStackTrace(); System.exit(1); } Relation r = (Relation) __example; TextAnnotation ta = r.getTarget().getTextAnnotation(); if (r.getTarget().getEndSpan() < ta.getView(ViewNames.TOKENS).getEndSpan() - 1) { return "" + (ta.getView(ViewNames.POS).getConstituentsCoveringToken(r.getTarget().getEndSpan() + 1).get(0).getLabel()); } return "OUT_OF_BOUND"; }
/** * Extracts the gazetteers feature for a given constituent * The size of the Constituent should be 1. * This function checks the word and two words after it * The feature itself was extracted in BIOReader prior to this extraction */ public static List<Pair<Integer, String>> getGazetteerFeatures(Constituent c){ List<Pair<Integer, String>> ret_features = new ArrayList<>(); View bioView = c.getTextAnnotation().getView("BIO"); for (int i = -1 ; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= bioView.getEndSpan()){ continue; } Constituent cCur = bioView.getConstituentsCoveringToken(c.getStartSpan() + i).get(0); if (cCur != null){ String[] features = cCur.getAttribute("GAZ").split(","); for (String f : features){ if (f == null){ continue; } ret_features.add(new Pair<>(i, f)); } } } return ret_features; }
@Override public Set<Feature> getFeatures(Constituent c) throws EdisonException { Set<Feature> features = new HashSet<>(); // get allowable window given position in text IntPair relativeWindow = FeatureCreatorUtil.getWindowSpan(c, windowStart, windowEnd, ignoreSentenceBoundaries); int absStart = c.getStartSpan() - relativeWindow.getFirst(); View tokens = c.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = relativeWindow.getFirst(); i <= relativeWindow.getSecond(); ++i) { Constituent word = tokens.getConstituentsCoveringToken(absStart + i).get(0); double[] embedding = WordEmbeddings.getEmbedding(word); if (embedding != null) { for (int dim = 0; dim < embedding.length; dim++) { final String[] pieces = {getName(), ":", "place", Integer.toString(i), "dim", Integer.toString(dim), ":", Double.toString(embedding[dim])}; features.add(FeatureCreatorUtil.createFeatureFromArray(pieces)); } } i++; } return features; }
@Override public Set<Feature> getFeatures(Constituent c) throws EdisonException { Set<Feature> features = new HashSet<>(); // get allowable window given position in text IntPair relativeWindow = FeatureCreatorUtil.getWindowSpan(c, windowStart, windowEnd, ignoreSentenceBoundaries); int absStart = c.getStartSpan() - relativeWindow.getFirst(); View tokens = c.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = relativeWindow.getFirst(); i <= relativeWindow.getSecond(); ++i) { Constituent word = tokens.getConstituentsCoveringToken(absStart + i).get(0); double[] embedding = WordEmbeddings.getEmbedding(word); if (embedding != null) { for (int dim = 0; dim < embedding.length; dim++) { final String[] pieces = {getName(), ":", "place", Integer.toString(i), "dim", Integer.toString(dim), ":", Double.toString(embedding[dim])}; features.add(FeatureCreatorUtil.createFeatureFromArray(pieces)); } } i++; } return features; }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }