edu.stanford.nlp.ling.HasWord java code examples

public static String joinWords(Iterable<? extends HasWord> l, String glue) {
 StringBuilder sb = new StringBuilder(l instanceof Collection ? ((Collection) l).size() : 64);
 boolean first = true;
 for (HasWord o : l) {
  if ( ! first) {
   sb.append(glue);
  } else {
   first = false;
  }
  sb.append(o.word());
 }
 return sb.toString();
}

public List<HasWord> apply(List<HasWord> in) {
 List<HasWord> escaped = new ArrayList<>(in);
 for (HasWord word : escaped) {
  word.setWord(ATBTreeUtils.escape(word.word()));
 }
 return escaped;
}

/**
 * Build a parse tree node corresponding to an elliptic node in the parse XML.
 */
private Tree buildEllipticNode(Node root) {
 Element eRoot = (Element) root;
 String constituentStr = eRoot.getNodeName();
 List<Tree> kids = new ArrayList<>();
 Tree leafNode = treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
 if (leafNode.label() instanceof HasWord)
  ((HasWord) leafNode.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
 kids.add(leafNode);
 Tree t = treeFactory.newTreeNode(constituentStr, kids);
 return t;
}

 Tree leafNode = treeFactory.newLeaf(s);
 if(leafNode.label() instanceof HasWord)
  ((HasWord) leafNode.label()).setWord(s);
 if (leafNode.label() instanceof CoreLabel && lemmas != null) {
  ((CoreLabel) leafNode.label()).setLemma(lemmas.get(i));
Tree leafNode = treeFactory.newLeaf(leafStr);
if (leafNode.label() instanceof HasWord)
 ((HasWord) leafNode.label()).setWord(leafStr);
if (leafNode.label() instanceof CoreLabel && lemmas != null) {
 ((CoreLabel) leafNode.label()).setLemma(lemmas.get(0));

protected String getText(Label label) {
 if (label instanceof HasWord) {
  String word = ((HasWord) label).word();
  if (word != null) {
   return word;
  }
 }
 return label.value();
}

/**
 * Americanize the HasWord or String coming in.
 *
 * @param w A HasWord or String to covert to American if needed.
 * @return Either the input or an Americanized version of it.
 */
@Override
public HasWord apply(HasWord w) {
 String str = w.word();
 String outStr = americanize(str, capitalizeTimex);
 if (!outStr.equals(str)) {
  w.setWord(outStr);
 }
 return w;
}

hw.setWord(leaf.label().value());

@SuppressWarnings("OverlyStrongTypeCast")
private static String getString(Object o) {
 if (o instanceof HasWord) {
  HasWord h = (HasWord) o;
  return h.word();
 } else if (o instanceof String) {
  return (String) o;
 } else if (o instanceof CoreMap) {
  return ((CoreMap) o).get(CoreAnnotations.TextAnnotation.class);
 } else {
  throw new RuntimeException("Expected token to be either Word or String.");
 }
}

/** Converts an input list of {@link HasWord} in IBM Arabic to
 *  LDC ATBv3 representation. The method safely copies the input object
 *  prior to escaping.
 *
 *  @param sentence A collection of type {@link edu.stanford.nlp.ling.Word}
 *  @return A copy of the input with each word escaped.
 *  @throws RuntimeException If a word is mapped to null
 */
@Override
public List<HasWord> apply(List<HasWord> sentence) {
 List<HasWord> newSentence = new ArrayList<>(sentence);
 for (HasWord wd : newSentence)
  wd.setWord(apply(wd.word()));
 return newSentence;
}

/**
 * Build a parse tree node corresponding to the word in the given XML node.
 */
private Tree buildWordNode(Node root) {
 Element eRoot = (Element) root;
 String posStr = getPOS(eRoot);
 posStr = treeNormalizer.normalizeNonterminal(posStr);
 String lemma = eRoot.getAttribute(ATTR_LEMMA);
 String word = getWord(eRoot);
 String leafStr = treeNormalizer.normalizeTerminal(word);
 Tree leafNode = treeFactory.newLeaf(leafStr);
 if (leafNode.label() instanceof HasWord)
  ((HasWord) leafNode.label()).setWord(leafStr);
 if (leafNode.label() instanceof HasLemma && lemma != null)
  ((HasLemma) leafNode.label()).setLemma(lemma);
 List<Tree> kids = new ArrayList<>();
 kids.add(leafNode);
 Tree t = treeFactory.newTreeNode(posStr, kids);
 if (t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr);
 return t;
}

/**
 * Returns the substring of the sentence from start (inclusive)
 * to end (exclusive).
 *
 * @param start Leftmost index of the substring
 * @param end Rightmost index of the ngram
 * @return The ngram as a String. Currently returns null if one of the indices is out of bounds.
 *         But maybe it should exception instead.
 */
public static <T> String extractNgram(List<T> list, int start, int end) {
 if (start < 0 || end > list.size() || start >= end) return null;
 final StringBuilder sb = new StringBuilder();
 for (int i = start; i < end; i++) {
  T o = list.get(i);
  if (sb.length() != 0) sb.append(' ');
  sb.append((o instanceof HasWord) ? ((HasWord) o).word() : o.toString());
 }
 return sb.toString();
}

/** <i>Note:</i> At present this clobbers the input list items.
 *  This should be fixed.
 */
public List<HasWord> apply(List<HasWord> arg) {
 List<HasWord> ans = new ArrayList<>(arg);
 for (HasWord wd : ans) {
  String w = wd.word();
  Matcher m2 = p2.matcher(w);
  // log.info("Escaper: w is " + w);
  if (m2.find()) {
   // log.info("  Found pattern.");
   w = m2.replaceAll("$1");
   // log.info("  Changed it to: " + w);
  }
  String newW = UTF8EquivalenceFunction.replaceAscii(w);
  wd.setWord(newW);
 }
 return ans;
}

/**
 * Gets the yield of the tree.  The {@code Label} of all leaf nodes
 * is returned
 * as a list ordered by the natural left to right order of the
 * leaves.  Null values, if any, are inserted into the list like any
 * other value.  This has been rewritten to thread, so only one List
 * is used.
 *
 * @param y The list in which the yield of the tree will be placed.
 *          Normally, this will be empty when the routine is called, but
 *          if not, the new yield is added to the end of the list.
 * @return a {@code List} of the data in the tree's leaves.
 */
@SuppressWarnings("unchecked")
public <T> List<T> yield(List<T> y) {
 if (isLeaf()) {
  if(label() instanceof HasWord) {
   HasWord hw = (HasWord) label();
   hw.setWord(label().value());
  }
  y.add((T) label());
 } else {
  Tree[] kids = children();
  for (Tree kid : kids) {
   kid.yield(y);
  }
 }
 return y;
}

/**
 * Splits the Word w on the character splitChar.
 */
private HasWord splitTag(HasWord w) {
 if (splitChar == 0) {
  return w;
 }
 String s = w.word();
 int split = s.lastIndexOf(splitChar);
 if (split <= 0) {    // == 0 isn't allowed - no empty words!
  return w;
 }
 String word = s.substring(0, split);
 String tag = s.substring(split + 1, s.length());
 return new TaggedWord(word, tag);
}

/**
 * @param input must be a List of objects of type HasWord
 */
@Override
public List<HasWord> process(List<? extends IN> input) {
 List<HasWord> output = new ArrayList<>();
 for (IN h : input) {
  String s = h.word();
  h.setWord(escapeString(s));
  output.add(h);
 }
 if (fixQuotes) {
  return fixQuotes(output);
 }
 return output;
}

((HasWord) newLeaf.label()).setWord(word);

/**
 * Returns a presentable version of the given PTB-tokenized words.
 * Pass in a List of Words or a Document and this method will
 * take the word() values (to prevent additional text from creeping in, e.g., POS tags),
 * and call {@link #ptb2Text(String)} on the output.
 *
 * @param ptbWords A list of HasWord objects
 * @return A presentable version of the given PTB-tokenized words
 */
public static String labelList2Text(List<? extends HasWord> ptbWords) {
 List<String> words = new ArrayList<>();
 for (HasWord hw : ptbWords) {
  words.add(hw.word());
 }
 return ptb2Text(words);
}

if (input.get(inputSize - 1).word().equals("\"")) {
  String tok = hw.word();
  if (tok.equals("\"")) {
   if (begin) {
    hw.setWord("``");
    begin = false;
   } else {
    hw.setWord("\'\'");
    begin = true;
  String tok = hw.word();
  if (tok.equals("\"")) {
   if (begin) {
    hw.setWord("``");
    begin = false;
   } else {
    hw.setWord("\'\'");
    begin = true;

/**
 * Build a parse tree node corresponding to an elliptic node in the parse XML.
 */
private Tree buildEllipticNode(Node root) {
 Element eRoot = (Element) root;
 String constituentStr = eRoot.getNodeName();
 List<Tree> kids = new ArrayList<>();
 Tree leafNode = treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
 if (leafNode.label() instanceof HasWord)
  ((HasWord) leafNode.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
 kids.add(leafNode);
 Tree t = treeFactory.newTreeNode(constituentStr, kids);
 return t;
}

/**
 * Creates a new CategoryWordTag label from an existing label.
 * The oldLabel value() -- i.e., category -- is used for the new label.
 * The tag and word
 * are initialized iff the current label implements HasTag and HasWord
 * respectively.
 *
 * @param oldLabel The label to use as a basis of this Label
 */
public CategoryWordTag(Label oldLabel) {
 super(oldLabel);
 if (oldLabel instanceof HasTag) {
  this.tag = ((HasTag) oldLabel).tag();
 }
 if (oldLabel instanceof HasWord) {
  this.word = ((HasWord) oldLabel).word();
 }
}

Javadoc

Something that implements the HasWord interface knows about words.

Most used methods

word
Return the word value of the label (or null if none).
setWord
Set the word value for the label (if one is stored).

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
setContentView (Activity)
getContentResolver (Context)
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Best plugins for Eclipse

How to useHasWord in edu.stanford.nlp.ling

Best Java code snippets using edu.stanford.nlp.ling.HasWord (Showing top 20 results out of 315)

How to use
HasWord
in
edu.stanford.nlp.ling