@Override public String tag() { return label.tag(); }
/** The head of the relation of this relation triple. This is usually the main verb. */ public CoreLabel relationHead() { return relation.stream() .filter(x -> x.tag().startsWith("V")) .reduce((x, y) -> y) .orElse(relation.get(relation.size() - 1)); }
private boolean containsValidPos(List<CoreLabel> tokens, int start, int end) { if (validPosPattern == null) { return true; } // log.info("CHECKING " + start + " " + end); for (int i = start; i < end; i ++) { // log.info("TAG = " + tokens.get(i).tag()); if (tokens.get(i).tag() == null) { throw new IllegalArgumentException("RegexNER was asked to check for valid tags on an untagged sequence. Either tag the sequence, perhaps with the pos annotator, or create RegexNER with an empty validPosPattern, perhaps with the property regexner.validpospattern"); } Matcher m = validPosPattern.matcher(tokens.get(i).tag()); if (m.find()) return true; } return false; }
/** * @param k Word index (zero = root node; actual word indexing * begins at 1) */ public String getPOS(int k) { if (k == 0) return Config.ROOT; else k--; List<CoreLabel> lbls = getCoreLabels(); return k < 0 || k >= lbls.size() ? Config.NULL : lbls.get(k).tag(); }
public static boolean hasThat(List<CoreLabel> words) { for(CoreLabel cl : words) { if(cl.word().equalsIgnoreCase("that") && cl.tag().equalsIgnoreCase("IN")) { return true; } } return false; }
/** list of pos tags **/ public List<String> posTags() { return tokens().stream().map(token -> token.tag()).collect(Collectors.toList()); }
/** * Source training data for a unigram tagger from the given tree. */ public static void updateTagger(TwoDimensionalCounter<String,String> tagger, Tree t) { List<CoreLabel> yield = t.taggedLabeledYield(); for (CoreLabel cl : yield) { if (cl.tag().equals(SpanishTreeNormalizer.MW_TAG)) continue; tagger.incrementCount(cl.word(), cl.tag()); } }
public static void updateTagger(TwoDimensionalCounter<String,String> tagger, Tree t) { List<CoreLabel> yield = t.taggedLabeledYield(); for(CoreLabel cl : yield) { if(RESOLVE_DUMMY_TAGS && cl.tag().equals(FrenchXMLTreeReader.MISSING_POS)) continue; else tagger.incrementCount(cl.word(), cl.tag()); } }
private static void removeSpuriousMentionsZhSimple(Annotation doc, List<List<Mention>> predictedMentions, Dictionaries dict) { for(int i=0 ; i < predictedMentions.size() ; i++) { List<Mention> mentions = predictedMentions.get(i); Set<Mention> remove = Generics.newHashSet(); for(Mention m : mentions){ if (m.originalSpan.size()==1 && m.headWord.tag().equals("CD")) { remove.add(m); } if (m.spanToString().contains("quot")) { remove.add(m); } } mentions.removeAll(remove); } }
/** * The subject of this relation triple, as a String of the subject's lemmas. * This method will additionally strip out punctuation as well. */ public String subjectLemmaGloss() { return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " "); }
/** * The object of this relation triple, as a String of the object's lemmas. * This method will additionally strip out punctuation as well. */ public String objectLemmaGloss() { return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " "); }
/** * Measures the percentage of incorrect taggings that can be attributed to OOV words. * * @param guess * @param gold */ private void measureOOV(Tree guess, Tree gold) { List<CoreLabel> goldTagging = gold.taggedLabeledYield(); List<CoreLabel> guessTagging = guess.taggedLabeledYield(); assert goldTagging.size() == guessTagging.size(); for(int i = 0; i < goldTagging.size(); i++) { if(!(goldTagging.get(i) == guessTagging.get(i))) { percentOOV2.incrementCount(goldTagging.get(i).tag()); if(!lex.isKnown(goldTagging.get(i).word())) percentOOV.incrementCount(goldTagging.get(i).tag()); } } }
public void process() throws IOException { SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory(); Tree t; for (File file : fileList) { Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ANCORA_ENCODING)); TreeReader tr = trf.newTreeReader(in); // Tree reading will implicitly perform tree normalization for us while ((t = tr.readTree()) != null) { // Update tagger with this tree List<CoreLabel> yield = t.taggedLabeledYield(); for (CoreLabel leafLabel : yield) { if (leafLabel.tag().equals(SpanishTreeNormalizer.MW_TAG)) continue; unigramTagger.incrementCount(leafLabel.word(), leafLabel.tag()); } } } }
private static boolean included(CoreLabel small, List<CoreLabel> big) { if(small.tag().equals("NNP")){ for(CoreLabel w: big){ if(small.word().equals(w.word()) || small.word().length() > 2 && w.word().startsWith(small.word())){ return true; } } } return false; }
private static boolean included(CoreLabel small, List<CoreLabel> big) { if(small.tag().equals("NNP")){ for(CoreLabel w: big){ if(small.word().equals(w.word()) || small.word().length() > 2 && w.word().startsWith(small.word())){ return true; } } } return false; }
@Override public void doOneFailedSentence(Annotation annotation, CoreMap sentence) { final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); Tree tree = ParserUtils.xTree(words); for (CoreLabel word : words) { if (word.tag() == null) { word.setTag("XX"); } } List<Tree> trees = Generics.newArrayList(1); trees.add(tree); finishSentence(sentence, trees); }
private static void addLemmata(Tree tree) { tree.yield().forEach(l -> { CoreLabel w = (CoreLabel) l; if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
/** * Adds stem under annotation {@code ann} to the given CoreLabel. * Assumes that it has a TextAnnotation and PartOfSpeechAnnotation. */ public void stem(CoreLabel label, Class<? extends CoreAnnotation<String>> ann) { String lemma = lemmatize(label.word(), label.tag(), lexer, lexer.option(1)); label.set(ann, lemma); }