edu.stanford.nlp.ling.Word java code examples

Refine search

Tree

@SuppressWarnings("unchecked")
public <X extends HasWord> ArrayList<X> yieldHasWord(ArrayList<X> y) {
 if (isLeaf()) {
  Label lab = label();
  // cdm: this is new hacked in stuff in Mar 2007 so we can now have a
  // well-typed version of a Sentence, whose objects MUST implement HasWord
  //
  // wsg (Feb. 2010) - More hacks for trees with CoreLabels in which the type implements
  // HasWord but only the value field is populated. This can happen if legacy code uses
  // LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf().
  if (lab instanceof HasWord) {
   if(lab instanceof CoreLabel) {
    CoreLabel cl = (CoreLabel) lab;
    if(cl.word() == null)
     cl.setWord(cl.value());
    y.add((X) cl);
   } else {
    y.add((X) lab);
   }
  } else {
   y.add((X) new Word(lab));
  }
 } else {
  Tree[] kids = children();
  for (Tree kid : kids) {
   kid.yield(y);
  }
 }
 return y;
}

/**
 * Stems <code>w</code> and returns stemmed <code>Word</code>.
 */
public Word stem(Word w) {
 return (new Word(stem(w.word())));
}

public static void updateOffsets(List<Word> tokens, int offset) {
 for(Word l: tokens) {
  l.setBeginPosition(l.beginPosition() + offset);
  l.setEndPosition(l.endPosition() + offset);
 }
}

public static String tokensToString(Word [] tokens) {
 StringBuilder  sb = new StringBuilder(512);
 for(int i = 0; i < tokens.length; i ++){
  if(i > 0) sb.append(" ");
  Word l = tokens[i];
  sb.append(l.word() + "{" + l.beginPosition() + ", " + l.endPosition() + "}");
 }
 return sb.toString();
}

private Tree transformTreeHelper(Tree t) {
 if (t != null) {
  String cat = t.label().value();
  if (t.isLeaf()) {
   Label label = new Word(cat); //new CategoryWordTag(cat,cat,"");
   t.setLabel(label);
  } else {
   Tree[] kids = t.children();

String parentStr;
String grandParentStr;
if (root == null || t.equals(root)) {
 parent = null;
 parentStr = "";
} else {
 parent = t.parent(root);
 parentStr = parent.label().value();
if (parent == null || parent.equals(root)) {
String baseGrandParentStr = tlpParams.treebankLanguagePack().basicCategory(grandParentStr);
if (t.isLeaf()) {
 return tf.newLeaf(new Word(t.label().value()));
String word = t.headTerminal(hf).value();

public ArrayList<Word> yieldWords(ArrayList<Word> y) {
 if (isLeaf()) {
  y.add(new Word(label()));
 } else {
  for (Tree kid : children()) {
   kid.yieldWords(y);
  }
 }
 return y;
}

/**
 * Changes the ROOT label, and adds a Lexicon.BOUNDARY daughter to it.
 * This is needed for the dependency parser.
 * <i>Note:</i> This is a destructive operation on the tree passed in!!
 *
 * @param t The current tree into which a boundary is inserted
 */
public void addRoot(Tree t) {
 if (t.isLeaf()) {
  log.info("Warning: tree is leaf: " + t);
  t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t));
 }
 t.setLabel(new CategoryWordTag(tlp.startSymbol(), Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG));
 List<Tree> preTermChildList = new ArrayList<>();
 Tree boundaryTerm = tf.newLeaf(new Word(Lexicon.BOUNDARY));//CategoryWordTag(Lexicon.BOUNDARY,Lexicon.BOUNDARY,""));
 preTermChildList.add(boundaryTerm);
 Tree boundaryPreTerm = tf.newTreeNode(new CategoryWordTag(Lexicon.BOUNDARY_TAG, Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG), preTermChildList);
 List<Tree> childList = t.getChildrenAsList();
 childList.add(boundaryPreTerm);
 t.setChildren(childList);
}

String cat = t.label().value();
if (t.isLeaf()) {
 Label label = new Word(cat);//new CategoryWordTag(cat,cat,"");
 return tf.newLeaf(label);
if (t.isPreTerminal()) {
 Tree childResult = transformTree(t.getChild(0));
 String word = childResult.value();  // would be nicer if Word/CWT ??

for (Tree node : subTrees()) {
 if (node.isLeaf() || node.children().length < 2) {
  continue;
  Tree hwt = node.headTerminal(hf);
  if (hwt != null) {
   w = new Word(hwt.label());
   w = new Word(((HasWord) l).word());
   Tree dwt = child.headTerminal(hf);
   if (dwt != null) {
    dw = new Word(dwt.label());
    dw = new Word(((HasWord) dl).word());
  if (w != null && w.word() != null && dw != null && w.word().equals(dw.word()) && !seenHead) {
   seenHead = true;
  } else {

/**
 * Create a new word, where the label is formed from
 * the <code>String</code> passed in.
 *
 * @param word The word that will go into the <code>Word</code>
 * @return The new label
 */
public Label newLabelFromString(String word) {
 return new Word(word);
}

Node node = new Node();
node.setLabel(parse.value());
for (edu.stanford.nlp.trees.Tree pt : parse.getChildrenAsList()) {
  if (!node.isSetChildren()) {
    node.setChildren(new TreeMap<Integer, String>());
  if (pt.isLeaf()) {
    continue;
  } else {
List<Word> words = parse.yieldWords();
span.setStart(words.get(0).beginPosition() + offset);
span.setEnding(words.get(words.size() - 1).endPosition() + offset);
Span rawSpan = getRawSpan(span);
node.setSpan(rawSpan);

List<HasWord> sentence = new ArrayList<>();
for (String word : sent) {
 sentence.add(new Word(word));
parse.pennPrint();
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
for (Label lab : parse.yield()) {
 if (lab instanceof CoreLabel) {
  System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 *
 * Subclasses that maintain additional information may which to
 * override this method.
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

public Word stem(Word w) {
 return new Word(stem(w.value()));
}

  @Override
  public StringInText[] tokenizeplus(String sentence) {
    Reader r = new StringReader(sentence);
    List<StringInText> l = new ArrayList<>();
    for (String s : tokenize(sentence)) {
      Word w = new Word(s);
      l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
          .endPosition() + startpos));
    }
    StringInText[] tok = new StringInText[l.size()];
    // tok[0]=new StringInText(is2.io.CONLLReader09.ROOT,0,0);
    int i = 0;
    for (StringInText s : l)
      tok[i++] = s;

    startpos += (1 + sentence.length());

    return tok;
  }
}

Tree gold = goldTop.firstChild();
List<HasWord> goldSentence = gold.yieldHasWord();
if (goldSentence.size() > maxLength) {
 log.info("Skipping sentence; too long: " + goldSentence.size());
  tree.pennPrint(pw);
 } else {
  Iterator sentIter = s.iterator();
  for (; ;) {
   Word word = (Word) sentIter.next();
   pw.print(word.word());
   if (sentIter.hasNext()) {
    pw.print(" ");

private static ArrayList<Word> postProcessSentence(ArrayList<Word> sent) {
 ArrayList<Word> newSent = new ArrayList<>();
 for(Word word : sent) {
  if(newSent.size() > 0) {
   String prevWord = newSent.get(newSent.size()-1).toString();
   String curWord = word.toString();
   String prevChar = prevWord.substring(prevWord.length()-1);
   String curChar = curWord.substring(0,1);
   if(!isChinese(prevChar) && !isChinese(curChar)) {
    Word mergedWord = new Word(prevWord+curWord);
    newSent.set(newSent.size()-1, mergedWord);
    //printlnErr("merged: "+mergedWord);
    //printlnErr("merged: "+mergedWord+" from: "+prevWord+" and: "+curWord);
    continue;
   }
  }
  newSent.add(word);
 }
 return new ArrayList<>(newSent);
}

String s = (String) child;
for (String w : s.split(" ")) {
  Word word = new Word();
  word.setValue(w.trim());
  list.add(word);

Matcher hasArabic = utf8ArabicChart.matcher(token.word());
if(hasArabic.find()) {
 token.setWord(escaper.apply(token.word()));
 token.setWord(lexMapper.map(null, token.word()));

Javadoc

A Word object acts as a Label by containing a String. This class is in essence identical to a StringLabel, but it also uses the value to implement the HasWord interface.

Most used methods

<init>
Construct a new word, with the given value.
word
beginPosition
endPosition
setValue
toString
value
setBeginPosition
setEndPosition
setWord
addSynonym
counterIncrease

Popular in Java

Reading from database using SQL prepared statement
getApplicationContext (Context)
addToBackStack (FragmentTransaction)
scheduleAtFixedRate (ScheduledExecutorService)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Permission (java.security)
Legacy security code; do not use.
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Best IntelliJ plugins

How to useWord in edu.stanford.nlp.ling

Best Java code snippets using edu.stanford.nlp.ling.Word (Showing top 20 results out of 315)

Refine search

How to use
Word
in
edu.stanford.nlp.ling