edu.stanford.nlp.ling.CoreLabel.lemma java code examples

@Override
public String lemma() {
 return label.lemma();
}

private static boolean lemmaExists(CoreLabel l) {
 return l.lemma() != null && ! l.lemma().isEmpty();
}

/**
 * The subject of this relation triple, as a String of the subject's lemmas.
 * This method will additionally strip out punctuation as well.
 */
 public String subjectLemmaGloss() {
 return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}

/**
 * The object of this relation triple, as a String of the object's lemmas.
 * This method will additionally strip out punctuation as well.
 */
public String objectLemmaGloss() {
 return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}

 @Override
 public String apply(CoreLabel l) {
  String s;
  if(PatternFactory.useLemmaContextTokens){
   s = l.lemma();
   assert s!=null : "Lemma is null and useLemmaContextTokens is true";
  }
  else
   s= l.word();
  if(ConstantsAndVariables.matchLowerCaseContext)
   s = s.toLowerCase();
  assert s!= null;
  return s;
 }
};

public void handleLemma(String arg, OutputStream outStream) 
 throws IOException
{
 if (arg == null) {
  return;
 }
 List<CoreLabel> tokens = parser.lemmatize(arg);
 OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
 for (int i = 0; i < tokens.size(); ++i) {
  CoreLabel word = tokens.get(i);
  if (i > 0) {
   osw.write(" ");
  }
  osw.write(word.lemma());
 }
 osw.write("\n");
 osw.flush();
}

/**
 * @see #conjugateEnglish(String, boolean)
 */
public String conjugateEnglish(CoreLabel token, boolean negated) {
 return conjugateEnglish(Optional.ofNullable(token.lemma()).orElse(token.word()), negated);
}

 /**
  * @see #conjugateEnglish(String, boolean)
  */
 public String conjugateEnglish(CoreLabel token) {
  return conjugateEnglish(Optional.ofNullable(token.lemma()).orElse(token.word()), false);

 }
}

/**
 * The relation of this relation triple, as a String of the relation's lemmas.
 * This method will additionally strip out punctuation as well, and lower-cases the relation.
 */
public String relationLemmaGloss() {
 // Construct a human readable relation string
 String relationGloss = (
   (prefixBe ? "be " : "")
   + StringUtils.join(relation.stream()
     .filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
     .map(x -> x.lemma() == null ? x.word() : x.lemma()),
    " ")
    .toLowerCase()
   + (suffixBe ? " be" : "")
   + (suffixOf ? " of" : "")
   + (istmod ? " at_time" : "")
 ).trim();
 // Some cosmetic tweaks
 if ("'s".equals(relationGloss)) {
  return "have";
 } else {
  return relationGloss;
 }
}

private static void print(Annotation annotation, PrintWriter pw, Options options) throws IOException {
 List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
 if(sentences != null) {
  for (CoreMap sentence : sentences) {
   StringBuilder sentenceToWrite = new StringBuilder();
   for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
    sentenceToWrite.append(" ");
    sentenceToWrite.append(token.lemma().toLowerCase());
    if (token.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("V")) //verb
     sentenceToWrite.append("_V");
    else if (token.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("N")) //noun
     sentenceToWrite.append("_N");
   }
   pw.print(sentenceToWrite); //omit first space
  }
 }
}

private static boolean containsStopWord(CoreLabel l, Set<String> commonEngWords, java.util.regex.Pattern ignoreWordRegex) {
 // if(useWordResultCache.containsKey(l.word()))
 // return useWordResultCache.get(l.word());
 if ((commonEngWords != null && ((lemmaExists(l) && commonEngWords.contains(l.lemma())) || commonEngWords.contains(l.word()))) || (ignoreWordRegex != null && ((lemmaExists(l) && ignoreWordRegex.matcher(l.lemma()).matches()) || ignoreWordRegex.matcher(l.word()).matches()))){
  //|| (ignoreWords !=null && (ignoreWords.contains(l.lemma()) || ignoreWords.contains(l.word())))) {
  // useWordResultCache.putIfAbsent(l.word(), false);
  return true;
 }
 //
 // if (l.word().length() >= minLen4Fuzzy) {
 // try {
 // String matchedFuzzy = NoisyLabelSentences.containsFuzzy(commonEngWords,
 // l.word(), minLen4Fuzzy);
 // if (matchedFuzzy != null) {
 // synchronized (commonEngWords) {
 // commonEngWords.add(l.word());
 // System.out.println("word is " + l.word() + " and matched fuzzy with " +
 // matchedFuzzy);
 // }
 // useWordResultCache.putIfAbsent(l.word(), false);
 // return false;
 // }
 // } catch (Exception e) {
 // e.printStackTrace();
 // System.out.println("Exception " + " while fuzzy matching " + l.word());
 // }
 // }
 // useWordResultCache.putIfAbsent(l.word(), true);
 return false;
}

public String[] getSplitPattern(){
 ArrayList<ArrayList<IndexedWord>> premodifiers = getPremodifiers();
 String[] components = new String[4];
 components[0] = headWord.lemma();
 if(premodifiers.size() == 0){
  components[1] = headWord.lemma();
  components[2] = headWord.lemma();
 } else if(premodifiers.size() == 1){
  ArrayList<AbstractCoreLabel> premod = Generics.newArrayList();
  premod.addAll(premodifiers.get(premodifiers.size()-1));
  premod.add(headWord);
  components[1] = getPattern(premod);
  components[2] = getPattern(premod);
 } else {
  ArrayList<AbstractCoreLabel> premod1 = Generics.newArrayList();
  premod1.addAll(premodifiers.get(premodifiers.size()-1));
  premod1.add(headWord);
  components[1] = getPattern(premod1);
  ArrayList<AbstractCoreLabel> premod2 = Generics.newArrayList();
  for(ArrayList<IndexedWord> premodifier : premodifiers){
   premod2.addAll(premodifier);
  }
  premod2.add(headWord);
  components[2] = getPattern(premod2);
 }
 components[3] = getPattern();
 return components;
}

public static boolean entityClusterAllCorefDictionary(CorefCluster menCluster, CorefCluster antCluster,
  Dictionaries dict, int dictColumn, int freq){
 boolean ret = false;
 for(Mention men : menCluster.getCorefMentions()){
  if(men.isPronominal()) continue;
  for(Mention ant : antCluster.getCorefMentions()){
   if(ant.isPronominal() || men.headWord.lemma().equals(ant.headWord.lemma())) continue;
   if(entityCorefDictionary(men, ant, dict, dictColumn, freq)){
    ret = true;
   } else {
    return false;
   }
  }
 }
 return ret;
}

public static boolean entityClusterAllCorefDictionary(CorefCluster menCluster, CorefCluster antCluster,
  Dictionaries dict, int dictColumn, int freq){
 boolean ret = false;
 for(Mention men : menCluster.getCorefMentions()){
  if(men.isPronominal()) continue;
  for(Mention ant : antCluster.getCorefMentions()){
   if(ant.isPronominal() || men.headWord.lemma().equals(ant.headWord.lemma())) continue;
   if(entityCorefDictionary(men, ant, dict, dictColumn, freq)){
    ret = true;
   } else {
    return false;
   }
  }
 }
 return ret;
}

/**
 * Converts a tree to the Morfette training format.
 */
private static String treeToMorfette(Tree tree) {
 StringBuilder sb = new StringBuilder();
 List<Label> yield = tree.yield();
 List<Label> tagYield = tree.preTerminalYield();
 assert yield.size() == tagYield.size();
 int listLen = yield.size();
 for (int i = 0; i < listLen; ++i) {
  CoreLabel token = (CoreLabel) yield.get(i);
  CoreLabel tag = (CoreLabel) tagYield.get(i);
  String morphStr = token.originalText();
  if (morphStr == null || morphStr.equals("")) {
   morphStr = tag.value();
  }
  String lemma = token.lemma();
  if (lemma == null || lemma.equals("")) {
   lemma = token.value();
  }
  sb.append(String.format("%s %s %s%n", token.value(), lemma, morphStr));
 }
 return sb.toString();
}

 private void statsWithoutApplyingPatterns(Map<String, DataInstance> sents, PatternsForEachToken patternsForEachToken,
   Counter<E> patternsLearnedThisIter, TwoDimensionalCounter<CandidatePhrase, E> wordsandLemmaPatExtracted){
  for (Entry<String, DataInstance> sentEn : sents.entrySet()) {
   Map<Integer, Set<E>> pat4Sent = patternsForEachToken.getPatternsForAllTokens(sentEn.getKey());

   if (pat4Sent == null) {
    throw new RuntimeException("How come there are no patterns for "
      + sentEn.getKey());
   }
   for (Entry<Integer, Set<E>> en : pat4Sent
     .entrySet()) {
    CoreLabel token = null;
    Set<E> p1 = en.getValue();

//        Set<Integer> p1 = en.getValue().first();
//        Set<Integer> p2 = en.getValue().second();
//        Set<Integer> p3 = en.getValue().third();
    for (E index : patternsLearnedThisIter.keySet()) {

     if (p1.contains(index)) {
      if (token == null)
       token = sentEn.getValue().getTokens().get(en.getKey());
      wordsandLemmaPatExtracted.incrementCount(CandidatePhrase.createOrGet(token.word(), token.lemma()), index);
     }
    }
   }
  }
 }

/**
 * Featurize a given sentence.
 *
 * @param sentence The sentence to featurize.
 *
 * @return A counter encoding the featurized sentence.
 */
private static Counter<String> featurize(CoreMap sentence) {
 ClassicCounter<String> features = new ClassicCounter<>();
 String lastLemma = "^";
 for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
  String lemma = token.lemma().toLowerCase();
  if (number.matcher(lemma).matches()) {
   features.incrementCount("**num**");
  } else {
   features.incrementCount(lemma);
  }
  if (alpha.matcher(lemma).matches()) {
   features.incrementCount(lastLemma + "__" + lemma);
   lastLemma = lemma;
  }
 }
 features.incrementCount(lastLemma + "__$");
 return features;
}

private static void addLemmata(Tree tree) {
 tree.yield().forEach(l -> {
  CoreLabel w = (CoreLabel) l;
  if(w.lemma() == null) {
   w.setLemma(MORPH.lemma(w.word(), w.tag()));
  }
 });
}

private static boolean findSpeaker(Document doc, int utterNum, int sentNum, List<CoreMap> sentences,
  int startIndex, int endIndex, Dictionaries dict) {
 List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
 for(int i = startIndex ; i < endIndex ; i++) {
  CoreLabel cl = sent.get(i);
  if(cl.get(CoreAnnotations.UtteranceAnnotation.class)!=0) continue;
  String lemma = cl.lemma();
  String word = cl.word();
  if(dict.reportVerb.contains(lemma) && cl.tag().startsWith("V")) {
   // find subject
   SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
   if (dependency == null) {
    dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
   }
   IndexedWord w = dependency.getNodeByWordPattern(word);
   if (w != null) {
    if(findSubject(doc, dependency, w, sentNum, utterNum)) return true;
    for(IndexedWord p : dependency.getPathToRoot(w)) {
     if(!p.tag().startsWith("V") && !p.tag().startsWith("MD")) break;
     if(findSubject(doc, dependency, p, sentNum, utterNum)) return true;    // handling something like "was talking", "can tell"
    }
   } else {
    Redwood.log("debug-preprocessor", "Cannot find node in dependency for word " + word);
   }
  }
 }
 return false;
}

 /**
  * A funky little helper method to interpret each token of the sentence as an HTML string, and translate it back to text.
  * Note that this is <b>in place</b>.
  */
 public void unescapeHTML() {
  // Change in the protobuf
  for (int i = 0; i < sentence.length(); ++i) {
   CoreNLPProtos.Token.Builder token = sentence.rawToken(i);
   token.setWord(StringUtils.unescapeHtml3(token.getWord()));
   token.setLemma(StringUtils.unescapeHtml3(token.getLemma()));
  }
  // Change in the annotation
  CoreMap cm = sentence.document.asAnnotation().get(CoreAnnotations.SentencesAnnotation.class).get(sentence.sentenceIndex());
  for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
   token.setWord(StringUtils.unescapeHtml3(token.word()));
   token.setLemma(StringUtils.unescapeHtml3(token.lemma()));
  }
 }
}

Popular methods of CoreLabel

get
set
word
beginPosition
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Creating JSON documents from java classes using gson
onCreateOptionsMenu (Activity)
putExtra (Intent)
runOnUiThread (Activity)
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Menu (java.awt)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top Sublime Text plugins

How to use lemmamethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.lemma (Showing top 20 results out of 315)

How to use
lemma
method
in
edu.stanford.nlp.ling.CoreLabel