edu.stanford.nlp.process.Morphology.<init> java code examples

@Override
public void visitTree(Tree t) {
 // A single Morphology is not threadsafe, so to make this class
 // threadsafe, we have to create a new Morphology for each visit
 processTree(t, null, new Morphology());
}

Morphology morph = new Morphology(new FileReader(arg), flags);
for (Word next; (next = morph.next()) != null; ) {
 System.out.print(next);

public List<? extends HasWord> tagCoreLabelsOrHasWords(List<? extends HasWord> sentence, Morphology morpha, boolean outputLemmas) {
 if (sentence.size() > 0 && sentence.get(0) instanceof CoreLabel) {
  List<CoreLabel> coreLabels = castCoreLabels(sentence);
  tagCoreLabels(coreLabels);
  if (outputLemmas) {
   // We may want to lemmatize things without using an existing
   // Morphology object, as Morphology objects are not
   // thread-safe, so we would make a new one here
   if (morpha == null) {
    morpha = new Morphology();
   }
   lemmatize(coreLabels, morpha);
  }
  return coreLabels;
 } else {
  List<TaggedWord> taggedSentence = tagSentence(sentence, false);
  return taggedSentence;
 }
}

Morphology morpha = (outputLemmas) ? new Morphology() : null;

@Override
public void annotate(Annotation annotation) {
 if (VERBOSE) {
  log.info("Finding lemmas ...");
 }
 Morphology morphology = new Morphology();
 if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
   List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
   //log.info("Lemmatizing sentence: " + tokens);
   for (CoreLabel token : tokens) {
    String text = token.get(CoreAnnotations.TextAnnotation.class);
    String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
    addLemma(morphology, CoreAnnotations.LemmaAnnotation.class, token, text, posTag);
   }
  }
 } else {
  throw new RuntimeException("Unable to find words/tokens in: " +
                annotation);
 }
}

@Override
public Tree transformTree(Tree t) {
 Morphology morphology = new Morphology();
 List<TaggedWord> tagged = null;
 int index = 0;
 for (Tree leaf : t.getLeaves()) {
  Label label = leaf.label();
  if (label == null) {
   continue;
  }
  String tag;
  if (!(label instanceof HasTag) || ((HasTag) label).tag() == null) {
   if (tagged == null) {
    tagged = t.taggedYield();
   }
   tag = tagged.get(index).tag();
  } else {
   tag = ((HasTag) label).tag();
  }
  if (!(label instanceof HasLemma)) {
   throw new IllegalArgumentException("Got a tree with labels which do not support lemma");
  }
  ((HasLemma) label).setLemma(morphology.lemma(label.value(), tag, true));
  ++index;
 }
 return t;
}

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
 List<TaggedWord> tagged;
 if (getOp().testOptions.preTag) {
  Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
  tagged = tagger.apply(tokens);
 } else {
  Tree tree = parse(tokens);
  tagged = tree.taggedYield();
 }
 Morphology morpha = new Morphology();
 List<CoreLabel> lemmas = Generics.newArrayList();
 for (TaggedWord token : tagged) {
  CoreLabel label = new CoreLabel();
  label.setWord(token.word());
  label.setTag(token.tag());
  morpha.stem(label);
  lemmas.add(label);
 }
 return lemmas;
}

Morphology morpha = (outputLemmas) ? new Morphology() : null;
for (List<X> sentence : document) {
 numWords += sentence.size();

protected TaggerWrapper(MaxentTagger tagger) {
 this.tagger = tagger;
 this.config = tagger.config;
 try {
  tokenizerFactory =
   chooseTokenizerFactory(config.getTokenize(),
               config.getTokenizerFactory(),
               config.getTokenizerOptions(),
               config.getTokenizerInvertible());
 } catch (Exception e) {
  log.info("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory());
  e.printStackTrace();
  tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions());
 }
 outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
 outputVerbosity = config.getOutputVerbosity();
 outputLemmas = config.getOutputLemmas();
 morpha = (outputLemmas) ? new Morphology() : null;
 tokenize = config.getTokenize();
 // tagSeparator = config.getTagSeparator();
}

private StanfordLemmatizer()
{
  this.analyzer = new Morphology();
}

public WordStemmer() {
 morpha = new Morphology();
}

@Override
public void visitTree(Tree t) {
 // A single Morphology is not threadsafe, so to make this class
 // threadsafe, we have to create a new Morphology for each visit
 processTree(t, null, new Morphology());
}

public void visitTree(Tree t) {
 // A single Morphology is not threadsafe, so to make this class
 // threadsafe, we have to create a new Morphology for each visit
 processTree(t, null, new Morphology());
}

@Override
public void visitTree(Tree t) {
 // A single Morphology is not threadsafe, so to make this class
 // threadsafe, we have to create a new Morphology for each visit
 processTree(t, null, new Morphology());
}

 String tag = "VBG";      
String word = "painting";
Morphology morphology = new Morphology();
String lemma = morphology.lemma(word, tag);

 Properties props = new Properties(); 
props.put("annotators", "tokenize, ssplit"); 
StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
String text = "painting";

Morphology morphology = new Morphology();

Annotation document = pipeline.process(text);  

List<edu.stanford.nlp.util.CoreMap> sentences = document.get(SentencesAnnotation.class);

for(edu.stanford.nlp.util.CoreMap sentence: sentences) {

 for(CoreLabel token: sentence.get(TokensAnnotation.class)) {       
  String word = token.get(TextAnnotation.class);
  String tag = ... //get the tag for the current word from somewhere, e.g. an array
  String lemma = morphology.lemma(word, tag);
  System.out.println("lemmatized version :" + lemma);
 }
}

public List<? extends HasWord> tagCoreLabelsOrHasWords(List<? extends HasWord> sentence, Morphology morpha, boolean outputLemmas) {
 if (sentence.size() > 0 && sentence.get(0) instanceof CoreLabel) {
  List<CoreLabel> coreLabels = castCoreLabels(sentence);
  tagCoreLabels(coreLabels);
  if (outputLemmas) {
   // We may want to lemmatize things without using an existing
   // Morphology object, as Morphology objects are not
   // thread-safe, so we would make a new one here
   if (morpha == null) {
    morpha = new Morphology();
   }
   lemmatize(coreLabels, morpha);
  }
  return coreLabels;
 } else {
  List<TaggedWord> taggedSentence = tagSentence(sentence, false);
  return taggedSentence;
 }
}

public List<? extends HasWord> tagCoreLabelsOrHasWords(List<? extends HasWord> sentence, Morphology morpha, boolean outputLemmas) {
 if (sentence.size() > 0 && sentence.get(0) instanceof CoreLabel) {
  List<CoreLabel> coreLabels = castCoreLabels(sentence);
  tagCoreLabels(coreLabels);
  if (outputLemmas) {
   // We may want to lemmatize things without using an existing
   // Morphology object, as Morphology objects are not
   // thread-safe, so we would make a new one here
   if (morpha == null) {
    morpha = new Morphology();
   }
   lemmatize(coreLabels, morpha);
  }
  return coreLabels;
 } else {
  List<TaggedWord> taggedSentence = tagSentence(sentence, false);
  return taggedSentence;
 }
}

 public static void main(String[] args) throws FileNotFoundException {
 String treeString = "(ROOT (S (NP (NNP John)) (VP (VBZ eats) (NP (NN pizza))) (. .)))";
 Tree tree = Tree.valueOf(treeString);
 SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);

 //add lemmata
 Morphology morphology = new Morphology();
 for (IndexedWord node : graph.vertexSet()) {
  String lemma = morphology.lemma(node.word(), node.tag());
  node.setLemma(lemma);
 }

 System.err.println(graph);
 SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<dobj=reln {lemma:/eat/}=B");
 SemgrexMatcher matcher = semgrex.matcher(graph);
 while (matcher.find()) {
  System.err.println(matcher.getNode("A") + " <<dobj " + matcher.getNode("B"));
 }
}

protected TaggerWrapper(MaxentTagger tagger) {
 this.tagger = tagger;
 this.config = tagger.config;
 try {
  tokenizerFactory =
   chooseTokenizerFactory(config.getTokenize(),
               config.getTokenizerFactory(),
               config.getTokenizerOptions(),
               config.getTokenizerInvertible());
 } catch (Exception e) {
  log.info("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory());
  e.printStackTrace();
  tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions());
 }
 outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
 outputVerbosity = config.getOutputVerbosity();
 outputLemmas = config.getOutputLemmas();
 morpha = (outputLemmas) ? new Morphology() : null;
 tokenize = config.getTokenize();
 // tagSeparator = config.getTagSeparator();
}

Javadoc

Process morphologically words from a Reader.

Popular methods of Morphology

lemma
stem
initStaticLexer
lemmaStatic
Lemmatize the word, being sensitive to the tag.
lemmatize
Lemmatize the word, being sensitive to the tag, using the passed in lexer.
next
stemStatic
Return a new WordTag which has the lemma as the value of word(). The default is to lowercase non-pro
stemStaticSynchronized
lemmaStaticSynchronized

Popular in Java

Making http post requests using okhttp
putExtra (Intent)
setScale (BigDecimal)
onRequestPermissionsResult (Fragment)
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
JTextField (javax.swing)
CodeWhisperer alternatives

How to use edu.stanford.nlp.process.Morphologyconstructor

Best Java code snippets using edu.stanford.nlp.process.Morphology.<init> (Showing top 20 results out of 315)

How to use
edu.stanford.nlp.process.Morphology
constructor