edu.stanford.nlp.ling.HasWord.word java code examples

public static String joinWords(Iterable<? extends HasWord> l, String glue) {
 StringBuilder sb = new StringBuilder(l instanceof Collection ? ((Collection) l).size() : 64);
 boolean first = true;
 for (HasWord o : l) {
  if ( ! first) {
   sb.append(glue);
  } else {
   first = false;
  }
  sb.append(o.word());
 }
 return sb.toString();
}

protected String getText(Label label) {
 if (label instanceof HasWord) {
  String word = ((HasWord) label).word();
  if (word != null) {
   return word;
  }
 }
 return label.value();
}

@SuppressWarnings("OverlyStrongTypeCast")
private static String getString(Object o) {
 if (o instanceof HasWord) {
  HasWord h = (HasWord) o;
  return h.word();
 } else if (o instanceof String) {
  return (String) o;
 } else if (o instanceof CoreMap) {
  return ((CoreMap) o).get(CoreAnnotations.TextAnnotation.class);
 } else {
  throw new RuntimeException("Expected token to be either Word or String.");
 }
}

/**
 * Returns the substring of the sentence from start (inclusive)
 * to end (exclusive).
 *
 * @param start Leftmost index of the substring
 * @param end Rightmost index of the ngram
 * @return The ngram as a String. Currently returns null if one of the indices is out of bounds.
 *         But maybe it should exception instead.
 */
public static <T> String extractNgram(List<T> list, int start, int end) {
 if (start < 0 || end > list.size() || start >= end) return null;
 final StringBuilder sb = new StringBuilder();
 for (int i = start; i < end; i++) {
  T o = list.get(i);
  if (sb.length() != 0) sb.append(' ');
  sb.append((o instanceof HasWord) ? ((HasWord) o).word() : o.toString());
 }
 return sb.toString();
}

/**
 * Splits the Word w on the character splitChar.
 */
private HasWord splitTag(HasWord w) {
 if (splitChar == 0) {
  return w;
 }
 String s = w.word();
 int split = s.lastIndexOf(splitChar);
 if (split <= 0) {    // == 0 isn't allowed - no empty words!
  return w;
 }
 String word = s.substring(0, split);
 String tag = s.substring(split + 1, s.length());
 return new TaggedWord(word, tag);
}

/**
 * Returns a presentable version of the given PTB-tokenized words.
 * Pass in a List of Words or a Document and this method will
 * take the word() values (to prevent additional text from creeping in, e.g., POS tags),
 * and call {@link #ptb2Text(String)} on the output.
 *
 * @param ptbWords A list of HasWord objects
 * @return A presentable version of the given PTB-tokenized words
 */
public static String labelList2Text(List<? extends HasWord> ptbWords) {
 List<String> words = new ArrayList<>();
 for (HasWord hw : ptbWords) {
  words.add(hw.word());
 }
 return ptb2Text(words);
}

/**
 * Creates a new CategoryWordTag label from an existing label.
 * The oldLabel value() -- i.e., category -- is used for the new label.
 * The tag and word
 * are initialized iff the current label implements HasTag and HasWord
 * respectively.
 *
 * @param oldLabel The label to use as a basis of this Label
 */
public CategoryWordTag(Label oldLabel) {
 super(oldLabel);
 if (oldLabel instanceof HasTag) {
  this.tag = ((HasTag) oldLabel).tag();
 }
 if (oldLabel instanceof HasWord) {
  this.word = ((HasWord) oldLabel).word();
 }
}

public void printSamples(List samples, PrintStream out) {
 for (int i = 0; i < document.size(); i++) {
  HasWord word = (HasWord) document.get(i);
  String s = "null";
  if (word!=null) {
   s = word.word();
  }
  out.print(StringUtils.padOrTrim(s, 10));
  for (Object sample : samples) {
   int[] sequence = (int[]) sample;
   out.print(" " + StringUtils.padLeft(sequence[i], 2));
  }
  out.println();
 }
}

public List<HasWord> apply(List<HasWord> in) {
 List<HasWord> escaped = new ArrayList<>(in);
 for (HasWord word : escaped) {
  word.setWord(ATBTreeUtils.escape(word.word()));
 }
 return escaped;
}

public static void extractLabels(Map<Pair<Integer, Integer>, String> spanToLabels, List<HasWord> tokens, String line) {
 String[] pieces = line.trim().split("\\s+");
 if (pieces.length == 0) {
  return;
 }
 if (pieces.length == 1) {
  String error = "Found line with label " + line + " but no tokens to associate with that line";
  throw new RuntimeException(error);
 }
 //TODO: BUG: The pieces are tokenized differently than the splitting, e.g., on possessive markers as in "actors' expenses"
 for (int i = 0; i < tokens.size() - pieces.length + 2; ++i) {
  boolean found = true;
  for (int j = 1; j < pieces.length; ++j) {
   if (!tokens.get(i + j - 1).word().equals(pieces[j])) {
    found = false;
    break;
   }
  }
  if (found) {
   spanToLabels.put(new Pair<>(i, i + pieces.length - 1), pieces[0]);
  }
 }
}

/** <i>Note:</i> At present this clobbers the input list items.
 *  This should be fixed.
 */
public List<HasWord> apply(List<HasWord> arg) {
 List<HasWord> ans = new ArrayList<>(arg);
 for (HasWord wd : ans) {
  String w = wd.word();
  Matcher m2 = p2.matcher(w);
  // log.info("Escaper: w is " + w);
  if (m2.find()) {
   // log.info("  Found pattern.");
   w = m2.replaceAll("$1");
   // log.info("  Changed it to: " + w);
  }
  String newW = UTF8EquivalenceFunction.replaceAscii(w);
  wd.setWord(newW);
 }
 return ans;
}

/**
 * Americanize the HasWord or String coming in.
 *
 * @param w A HasWord or String to covert to American if needed.
 * @return Either the input or an Americanized version of it.
 */
@Override
public HasWord apply(HasWord w) {
 String str = w.word();
 String outStr = americanize(str, capitalizeTimex);
 if (!outStr.equals(str)) {
  w.setWord(outStr);
 }
 return w;
}

/** Converts an input list of {@link HasWord} in IBM Arabic to
 *  LDC ATBv3 representation. The method safely copies the input object
 *  prior to escaping.
 *
 *  @param sentence A collection of type {@link edu.stanford.nlp.ling.Word}
 *  @return A copy of the input with each word escaped.
 *  @throws RuntimeException If a word is mapped to null
 */
@Override
public List<HasWord> apply(List<HasWord> sentence) {
 List<HasWord> newSentence = new ArrayList<>(sentence);
 for (HasWord wd : newSentence)
  wd.setWord(apply(wd.word()));
 return newSentence;
}

@Override
@SuppressWarnings("unchecked")
protected T getNext() {
 try {
  T nextToken = null;
  // Depending on the orthographic normalization options,
  // some tokens can be obliterated. In this case, keep iterating
  // until we see a non-zero length token.
  do {
   nextToken = (T) lexer.next();
  } while (nextToken != null && nextToken.word().length() == 0);
  return nextToken;
 } catch (IOException e) {
  throw new RuntimeIOException(e);
 }
}

/**
 * @param input must be a List of objects of type HasWord
 */
@Override
public List<HasWord> process(List<? extends IN> input) {
 List<HasWord> output = new ArrayList<>();
 for (IN h : input) {
  String s = h.word();
  h.setWord(escapeString(s));
  output.add(h);
 }
 if (fixQuotes) {
  return fixQuotes(output);
 }
 return output;
}

private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) {
 // CharacterLevelTagExtender ext = new CharacterLevelTagExtender();
 ClassicCounter<Integer> c = new ClassicCounter<>();
 for (Tree gold : tb) {
  StringBuilder goldChars = new StringBuilder();
  ArrayList goldYield = gold.yield();
  for (Object aGoldYield : goldYield) {
   Word word = (Word) aGoldYield;
   goldChars.append(word);
  }
  List<HasWord> ourWords = segment(goldChars.toString());
  for (HasWord ourWord : ourWords) {
   c.incrementCount(Integer.valueOf(ourWord.word().length()));
  }
 }
 return Distribution.getDistribution(c);
}

@Override
public boolean test(Dependency<G, D, N> d) {
 /*
 log.info("DRF: Checking " + d + ": hasWord?: " +
           (d.dependent() instanceof HasWord) + "; value: " +
           ((d.dependent() instanceof HasWord)? ((HasWord) d.dependent()).word(): d.dependent().value()));
 */
 if (d == null) {
  return false;
 }
 String word = null;
 if (d.dependent() instanceof HasWord) {
  word = ((HasWord) d.dependent()).word();
 }
 if (word == null) {
  word = d.dependent().value();
 }
 // log.info("Dep: kid is " + ((MapLabel) d.dependent()).toString("value{map}"));
 return wordRejectFilter.test(word);
}

/**
 * Construct a fall through tree in case we can't parse this sentence.
 *
 * @param words Words of the sentence that didn't parse
 * @return A tree with X for all the internal nodes.
 *     Preterminals have the right tag if the words are tagged.
 */
public static Tree xTree(List<? extends HasWord> words) {
 TreeFactory treeFactory = new LabeledScoredTreeFactory();
 List<Tree> lst2 = new ArrayList<>();
 for (HasWord obj : words) {
  String s = obj.word();
  Tree t = treeFactory.newLeaf(s);
  String tag = "XX";
  if (obj instanceof HasTag) {
   if (((HasTag) obj).tag() != null) {
    tag = ((HasTag) obj).tag();
   }
  }
  Tree t2 = treeFactory.newTreeNode(tag, Collections.singletonList(t));
  lst2.add(t2);
 }
 return treeFactory.newTreeNode("X", lst2);
}

/** Turns a sentence into a flat phrasal tree.
 *  The structure is S -&gt; tag*.  And then each tag goes to a word.
 *  The tag is either found from the label or made "WD".
 *  The tag and phrasal node have a StringLabel.
 *
 *  @param s The Sentence to make the Tree from
 *  @param lf The LabelFactory with which to create the new Tree labels
 *  @return The one phrasal level Tree
 */
public static Tree toFlatTree(List<? extends HasWord> s, LabelFactory lf) {
 List<Tree> daughters = new ArrayList<>(s.size());
 for (HasWord word : s) {
  Tree wordNode = new LabeledScoredTreeNode(lf.newLabel(word.word()));
  if (word instanceof TaggedWord) {
   TaggedWord taggedWord = (TaggedWord) word;
   wordNode = new LabeledScoredTreeNode(new StringLabel(taggedWord.tag()), Collections.singletonList(wordNode));
  } else {
   wordNode = new LabeledScoredTreeNode(lf.newLabel("WD"), Collections.singletonList(wordNode));
  }
  daughters.add(wordNode);
 }
 return new LabeledScoredTreeNode(new StringLabel("S"), daughters);
}

/**
 * Do nothing other than decorate the label with @
 */
private static Label makeSimpleSyntheticLabel(Tree t) {
 String topCat = t.label().value();
 String labelStr = '@' + topCat;
 String word = ((HasWord) t.label()).word();
 String tag = ((HasTag) t.label()).tag();
 return new CategoryWordTag(labelStr, word, tag);
}

Javadoc

Return the word value of the label (or null if none).

Popular methods of HasWord

setWord
Set the word value for the label (if one is stored).

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
setContentView (Activity)
getContentResolver (Context)
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Github Copilot alternatives

How to use wordmethodin edu.stanford.nlp.ling.HasWord

Best Java code snippets using edu.stanford.nlp.ling.HasWord.word (Showing top 20 results out of 315)

How to use
word
method
in
edu.stanford.nlp.ling.HasWord