public static String joinWords(Iterable<? extends HasWord> l, String glue) { StringBuilder sb = new StringBuilder(l instanceof Collection ? ((Collection) l).size() : 64); boolean first = true; for (HasWord o : l) { if ( ! first) { sb.append(glue); } else { first = false; } sb.append(o.word()); } return sb.toString(); }
/** * Build a parse tree node corresponding to an elliptic node in the parse XML. */ private Tree buildEllipticNode(Node root) { Element eRoot = (Element) root; String constituentStr = eRoot.getNodeName(); List<Tree> kids = new ArrayList<>(); Tree leafNode = treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE); if (leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE); kids.add(leafNode); Tree t = treeFactory.newTreeNode(constituentStr, kids); return t; }
Tree leafNode = treeFactory.newLeaf(s); if(leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(s); if (leafNode.label() instanceof CoreLabel && lemmas != null) { ((CoreLabel) leafNode.label()).setLemma(lemmas.get(i)); Tree leafNode = treeFactory.newLeaf(leafStr); if (leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(leafStr); if (leafNode.label() instanceof CoreLabel && lemmas != null) { ((CoreLabel) leafNode.label()).setLemma(lemmas.get(0));
protected String getText(Label label) { if (label instanceof HasWord) { String word = ((HasWord) label).word(); if (word != null) { return word; } } return label.value(); }
/** * Americanize the HasWord or String coming in. * * @param w A HasWord or String to covert to American if needed. * @return Either the input or an Americanized version of it. */ @Override public HasWord apply(HasWord w) { String str = w.word(); String outStr = americanize(str, capitalizeTimex); if (!outStr.equals(str)) { w.setWord(outStr); } return w; }
hw.setWord(leaf.label().value());
@SuppressWarnings("OverlyStrongTypeCast") private static String getString(Object o) { if (o instanceof HasWord) { HasWord h = (HasWord) o; return h.word(); } else if (o instanceof String) { return (String) o; } else if (o instanceof CoreMap) { return ((CoreMap) o).get(CoreAnnotations.TextAnnotation.class); } else { throw new RuntimeException("Expected token to be either Word or String."); } }
/** Converts an input list of {@link HasWord} in IBM Arabic to * LDC ATBv3 representation. The method safely copies the input object * prior to escaping. * * @param sentence A collection of type {@link edu.stanford.nlp.ling.Word} * @return A copy of the input with each word escaped. * @throws RuntimeException If a word is mapped to null */ @Override public List<HasWord> apply(List<HasWord> sentence) { List<HasWord> newSentence = new ArrayList<>(sentence); for (HasWord wd : newSentence) wd.setWord(apply(wd.word())); return newSentence; }
/** * Build a parse tree node corresponding to the word in the given XML node. */ private Tree buildWordNode(Node root) { Element eRoot = (Element) root; String posStr = getPOS(eRoot); posStr = treeNormalizer.normalizeNonterminal(posStr); String lemma = eRoot.getAttribute(ATTR_LEMMA); String word = getWord(eRoot); String leafStr = treeNormalizer.normalizeTerminal(word); Tree leafNode = treeFactory.newLeaf(leafStr); if (leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(leafStr); if (leafNode.label() instanceof HasLemma && lemma != null) ((HasLemma) leafNode.label()).setLemma(lemma); List<Tree> kids = new ArrayList<>(); kids.add(leafNode); Tree t = treeFactory.newTreeNode(posStr, kids); if (t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr); return t; }
/** * Returns the substring of the sentence from start (inclusive) * to end (exclusive). * * @param start Leftmost index of the substring * @param end Rightmost index of the ngram * @return The ngram as a String. Currently returns null if one of the indices is out of bounds. * But maybe it should exception instead. */ public static <T> String extractNgram(List<T> list, int start, int end) { if (start < 0 || end > list.size() || start >= end) return null; final StringBuilder sb = new StringBuilder(); for (int i = start; i < end; i++) { T o = list.get(i); if (sb.length() != 0) sb.append(' '); sb.append((o instanceof HasWord) ? ((HasWord) o).word() : o.toString()); } return sb.toString(); }
/** <i>Note:</i> At present this clobbers the input list items. * This should be fixed. */ public List<HasWord> apply(List<HasWord> arg) { List<HasWord> ans = new ArrayList<>(arg); for (HasWord wd : ans) { String w = wd.word(); Matcher m2 = p2.matcher(w); // log.info("Escaper: w is " + w); if (m2.find()) { // log.info(" Found pattern."); w = m2.replaceAll("$1"); // log.info(" Changed it to: " + w); } String newW = UTF8EquivalenceFunction.replaceAscii(w); wd.setWord(newW); } return ans; }
/** * Gets the yield of the tree. The {@code Label} of all leaf nodes * is returned * as a list ordered by the natural left to right order of the * leaves. Null values, if any, are inserted into the list like any * other value. This has been rewritten to thread, so only one List * is used. * * @param y The list in which the yield of the tree will be placed. * Normally, this will be empty when the routine is called, but * if not, the new yield is added to the end of the list. * @return a {@code List} of the data in the tree's leaves. */ @SuppressWarnings("unchecked") public <T> List<T> yield(List<T> y) { if (isLeaf()) { if(label() instanceof HasWord) { HasWord hw = (HasWord) label(); hw.setWord(label().value()); } y.add((T) label()); } else { Tree[] kids = children(); for (Tree kid : kids) { kid.yield(y); } } return y; }
/** * Splits the Word w on the character splitChar. */ private HasWord splitTag(HasWord w) { if (splitChar == 0) { return w; } String s = w.word(); int split = s.lastIndexOf(splitChar); if (split <= 0) { // == 0 isn't allowed - no empty words! return w; } String word = s.substring(0, split); String tag = s.substring(split + 1, s.length()); return new TaggedWord(word, tag); }
/** * @param input must be a List of objects of type HasWord */ @Override public List<HasWord> process(List<? extends IN> input) { List<HasWord> output = new ArrayList<>(); for (IN h : input) { String s = h.word(); h.setWord(escapeString(s)); output.add(h); } if (fixQuotes) { return fixQuotes(output); } return output; }
((HasWord) newLeaf.label()).setWord(word);
/** * Returns a presentable version of the given PTB-tokenized words. * Pass in a List of Words or a Document and this method will * take the word() values (to prevent additional text from creeping in, e.g., POS tags), * and call {@link #ptb2Text(String)} on the output. * * @param ptbWords A list of HasWord objects * @return A presentable version of the given PTB-tokenized words */ public static String labelList2Text(List<? extends HasWord> ptbWords) { List<String> words = new ArrayList<>(); for (HasWord hw : ptbWords) { words.add(hw.word()); } return ptb2Text(words); }
if (input.get(inputSize - 1).word().equals("\"")) { String tok = hw.word(); if (tok.equals("\"")) { if (begin) { hw.setWord("``"); begin = false; } else { hw.setWord("\'\'"); begin = true; String tok = hw.word(); if (tok.equals("\"")) { if (begin) { hw.setWord("``"); begin = false; } else { hw.setWord("\'\'"); begin = true;
/** * Build a parse tree node corresponding to an elliptic node in the parse XML. */ private Tree buildEllipticNode(Node root) { Element eRoot = (Element) root; String constituentStr = eRoot.getNodeName(); List<Tree> kids = new ArrayList<>(); Tree leafNode = treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE); if (leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE); kids.add(leafNode); Tree t = treeFactory.newTreeNode(constituentStr, kids); return t; }
/** * Creates a new CategoryWordTag label from an existing label. * The oldLabel value() -- i.e., category -- is used for the new label. * The tag and word * are initialized iff the current label implements HasTag and HasWord * respectively. * * @param oldLabel The label to use as a basis of this Label */ public CategoryWordTag(Label oldLabel) { super(oldLabel); if (oldLabel instanceof HasTag) { this.tag = ((HasTag) oldLabel).tag(); } if (oldLabel instanceof HasWord) { this.word = ((HasWord) oldLabel).word(); } }