/** * {@inheritDoc} */ @Override public String originalText() { return getString(CoreAnnotations.OriginalTextAnnotation.class); }
/** * {@inheritDoc} */ @Override public String after() { return getString(CoreAnnotations.AfterAnnotation.class); }
/** * {@inheritDoc} */ @Override public <KEY extends Key<String>> String getString(Class<KEY> key) { return this.getString(key, ""); }
/** * {@inheritDoc} */ @Override public String before() { return getString(CoreAnnotations.BeforeAnnotation.class); }
@Override public <KEY extends TypesafeMap.Key<String>> String getString(Class<KEY> key) { return label.getString(key); }
@Override public <KEY extends TypesafeMap.Key<String>> String getString(Class<KEY> key, String def) { return label.getString(key, def); }
/** * Convenience methods for subclasses which use CoreLabel. Gets the * word after applying any wordFunction present in the * SeqClassifierFlags. * * @param label A CoreLabel * @return The TextAnnotation of the label, perhaps after passing it through * a function (flags.wordFunction) */ protected String getWord(CoreLabel label) { String word = label.getString(CoreAnnotations.TextAnnotation.class); if (flags.wordFunction != null) { word = flags.wordFunction.apply(word); } return word; }
private static void dictionaryFeaturesC(Class<? extends CoreAnnotation<String>> lbeginFieldName, Class<? extends CoreAnnotation<String>> lmiddleFieldName, Class<? extends CoreAnnotation<String>> lendFieldName, String dictSuffix, Collection<String> features, CoreLabel p, CoreLabel c, CoreLabel c2) { String lbegin = c.getString(lbeginFieldName); String lmiddle = c.getString(lmiddleFieldName); String lend = c.getString(lendFieldName); features.add(lbegin+dictSuffix+"-lb"); features.add(lmiddle+dictSuffix+"-lm"); features.add(lend+dictSuffix+"-le"); lbegin = p.getString(lbeginFieldName); lmiddle = p.getString(lmiddleFieldName); lend = p.getString(lendFieldName); features.add(lbegin+dictSuffix+"-plb"); features.add(lmiddle+dictSuffix+"-plm"); features.add(lend+dictSuffix+"-ple"); lbegin = c2.getString(lbeginFieldName); lmiddle = c2.getString(lmiddleFieldName); lend = c2.getString(lendFieldName); features.add(lbegin+dictSuffix+"-c2lb"); features.add(lmiddle+dictSuffix+"-c2lm"); features.add(lend+dictSuffix+"-c2le"); }
Class<? extends CoreAnnotation<String>> lendFieldName, String dictSuffix, Collection<String> features, CoreLabel p2, CoreLabel p, CoreLabel c, CoreLabel c2) { String lbegin = c.getString(lbeginFieldName); String lmiddle = c.getString(lmiddleFieldName); String lend = c.getString(lendFieldName); features.add(lbegin+dictSuffix+"-lb"); features.add(lmiddle+dictSuffix+"-lm"); features.add(lend+dictSuffix+"-le"); lbegin = p.getString(lbeginFieldName); lmiddle = p.getString(lmiddleFieldName); lend = p.get(lendFieldName); features.add(lbegin+dictSuffix+"-plb"); lbegin = c2.getString(lbeginFieldName); lmiddle = c2.getString(lmiddleFieldName); lend = c2.getString(lendFieldName); features.add(lbegin+dictSuffix+"-c2lb"); features.add(lmiddle+dictSuffix+"-c2lm"); String p2Lend = p2.getString(lendFieldName); String pLend = p.getString(lendFieldName); String pLbegin = p.getString(lbeginFieldName); String cLbegin = c.getString(lbeginFieldName); String cLmiddle = c.getString(lmiddleFieldName); if (flags.useDictionaryConjunctions3) { features.add(pLend + cLbegin + cLmiddle + dictSuffix + "-pcLconj1");
/** * for printing labeled sentence in less verbose manner * * @return string for printing */ public static String labeledSentenceToString(List<CoreLabel> labeledSentence, boolean printNer) { StringBuilder sb = new StringBuilder(); sb.append("[ "); for (CoreLabel label : labeledSentence) { String word = label.getString(annotationForWord); String answer = label.getString(AnswerAnnotation.class); String tag = label.getString(PartOfSpeechAnnotation.class); sb.append(word).append("(").append(tag); if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) { sb.append(" ").append(answer); } if (printNer) { sb.append(" ner:").append(label.ner()); } sb.append(") "); } sb.append("]"); return sb.toString(); }
/** * Look along CD words and see if next thing is a money word. * * @param pl The list of CoreLabel * @param i The position to scan right from * @return Whether a money word is found */ private static boolean rightScanFindsMoneyWord(List<CoreLabel> pl, int i) { int j = i; if (DEBUG) { log.info("rightScan from: " + pl.get(j).word()); } int sz = pl.size(); while (j < sz && pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) { j++; } if (j >= sz) { return false; } String tag = pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class); String word = pl.get(j).word(); if (DEBUG) { log.info("rightScan testing: " + word + '/' + tag + "; answer is: " + Boolean.toString((tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches())); } return (tag.equals("M") || tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches(); }
/** * Look along CD words and see if next thing is a money word * like cents or pounds. * * @param pl The list of CoreLabel * @param i The position to scan right from * @return Whether a money word is found */ private static boolean rightScanFindsMoneyWord(List<CoreLabel> pl, int i) { int j = i; if (DEBUG) { log.info("rightScan from: " + pl.get(j).word()); } int sz = pl.size(); while (j < sz && pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) { j++; } if (j >= sz) { return false; } String tag = pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class); String word = pl.get(j).word(); if (DEBUG) { log.info("rightScan testing: " + word + '/' + tag + "; answer is: " + Boolean.toString((tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches())); } return (tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches(); }
/** * Outputs a partial CONLL-U file with token information (form, lemma, POS) * but without any dependency information. * * @param sentence * @return */ public String printPOSAnnotations(CoreMap sentence) { StringBuilder sb = new StringBuilder(); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_"); String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_"); String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_"); String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class)); String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_"); sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(), lemma, upos , pos, featuresString, "_", "_", "_", misc)); } sb.append("\n"); return sb.toString(); }
CoreLabel p = cInfo.get(loc - 1); CoreLabel p2 = cInfo.get(loc - 2); String charc = c.getString(CoreAnnotations.CharAnnotation.class); String charc2 = c2.getString(CoreAnnotations.CharAnnotation.class); String charp = p.getString(CoreAnnotations.CharAnnotation.class); String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);
/** Write a standard CoNLL format output file. * * @param doc The document: A List of CoreLabel * @param out Where to send the answers to */ @Override @SuppressWarnings({"StringEquality", "StringContatenationInLoop"}) public void printAnswers(List<CoreLabel> doc, PrintWriter out) { // boolean tagsMerged = flags.mergeTags; // boolean useHead = flags.splitOnHead; if ( ! "iob1".equalsIgnoreCase(flags.entitySubclassification)) { deEndify(doc); } for (CoreLabel fl : doc) { String word = fl.word(); if (word == BOUNDARY) { // Using == is okay, because it is set to constant out.println(); } else { String gold = fl.getString(CoreAnnotations.GoldAnswerAnnotation.class); String guess = fl.get(CoreAnnotations.AnswerAnnotation.class); // log.info(word + "\t" + gold + "\t" + guess)); String pos = fl.getString(CoreAnnotations.PartOfSpeechAnnotation.class); String chunk = fl.getString(CoreAnnotations.ChunkAnnotation.class); out.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' + gold + '\t' + guess); } } }
private void setTrueCaseText(CoreLabel l) { String trueCase = l.getString(CoreAnnotations.TrueCaseAnnotation.class); String text = l.word(); String trueCaseText = text; switch (trueCase) { case "UPPER": trueCaseText = text.toUpperCase(); break; case "LOWER": trueCaseText = text.toLowerCase(); break; case "INIT_UPPER": trueCaseText = Character.toTitleCase(text.charAt(0)) + text.substring(1).toLowerCase(); break; case "O": // The model predicted mixed case, so lookup the map: String lower = text.toLowerCase(); if (mixedCaseMap.containsKey(lower)) { trueCaseText = mixedCaseMap.get(lower); } // else leave it as it was? break; } // System.err.println(text + " was classified as " + trueCase + " and so became " + trueCaseText); l.set(CoreAnnotations.TrueCaseTextAnnotation.class, trueCaseText); if (overwriteText) { l.set(CoreAnnotations.TextAnnotation.class, trueCaseText); l.set(CoreAnnotations.ValueAnnotation.class, trueCaseText); } }
if (flags.useTags) { if (flags.maxLeft >= 3 && !flags.dontExtendTaggy) { featuresCpCp2Cp3C.add(p3.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-TTTS"); if (flags.useTaggySequencesShapeInteraction) { featuresCpCp2Cp3C.add(p3.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTTS-CS");
CoreLabel p2 = cInfo.get(loc - 2); CoreLabel p3 = cInfo.get(loc - 3); String charc = c.getString(CoreAnnotations.CharAnnotation.class); String charp = p.getString(CoreAnnotations.CharAnnotation.class); String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class); String charp3 = p3.getString(CoreAnnotations.CharAnnotation.class); Integer cI = c.get(CoreAnnotations.UTypeAnnotation.class); String uTypec = (cI != null ? cI.toString() : "");
if(me.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("OD")) { me.set(CoreAnnotations.AnswerAnnotation.class, ORDINAL_TAG); } else if(CURRENCY_WORD_PATTERN.matcher(me.word()).matches() && prev.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) { } else if(me.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) { } else if(me.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NT")) {
if (cInfo.get(loc - reverse(1)).getString(CoreAnnotations.PartOfSpeechAnnotation.class) != null && isNameCase(pWord) && cInfo.get(loc - reverse(1)).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP")) { for (int jump = 3; jump < 150; jump++) { if (getWord(cInfo.get(loc + reverse(jump))).equals(word)) { for (int jump = 3; jump < 150; jump++) { if (getWord(cInfo.get(loc + reverse(jump))).equals(word)) { if (isNameCase(getWord(cInfo.get(loc + reverse(jump - 1)))) && (cInfo.get(loc + reverse(jump - 1))).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP")) { l.add("X-NEXT-OCCURRENCE-YX"); } else if (isNameCase(getWord(cInfo.get(loc + reverse(jump + 1)))) && (cInfo.get(loc + reverse(jump + 1))).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP")) { if (isNameCase(getWord(cInfo.get(loc + reverse(jump + 1)))) && (cInfo.get(loc + reverse(jump + 1))).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP")) { l.add("X-PREV-OCCURRENCE-YX"); } else if (isNameCase(getWord(cInfo.get(loc + reverse(jump - 1)))) && cInfo.get(loc + reverse(jump - 1)).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP")) { l.add("X-PREV-OCCURRENCE-XY");