Refine search
@SuppressWarnings("unchecked") public <X extends HasWord> ArrayList<X> yieldHasWord(ArrayList<X> y) { if (isLeaf()) { Label lab = label(); // cdm: this is new hacked in stuff in Mar 2007 so we can now have a // well-typed version of a Sentence, whose objects MUST implement HasWord // // wsg (Feb. 2010) - More hacks for trees with CoreLabels in which the type implements // HasWord but only the value field is populated. This can happen if legacy code uses // LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf(). if (lab instanceof HasWord) { if(lab instanceof CoreLabel) { CoreLabel cl = (CoreLabel) lab; if(cl.word() == null) cl.setWord(cl.value()); y.add((X) cl); } else { y.add((X) lab); } } else { y.add((X) new Word(lab)); } } else { Tree[] kids = children(); for (Tree kid : kids) { kid.yield(y); } } return y; }
public static void updateOffsets(List<Word> tokens, int offset) { for(Word l: tokens) { l.setBeginPosition(l.beginPosition() + offset); l.setEndPosition(l.endPosition() + offset); } }
public static String tokensToString(Word [] tokens) { StringBuilder sb = new StringBuilder(512); for(int i = 0; i < tokens.length; i ++){ if(i > 0) sb.append(" "); Word l = tokens[i]; sb.append(l.word() + "{" + l.beginPosition() + ", " + l.endPosition() + "}"); } return sb.toString(); }
String parentStr; String grandParentStr; if (root == null || t.equals(root)) { parent = null; parentStr = ""; } else { parent = t.parent(root); parentStr = parent.label().value(); if (parent == null || parent.equals(root)) { String baseGrandParentStr = tlpParams.treebankLanguagePack().basicCategory(grandParentStr); if (t.isLeaf()) { return tf.newLeaf(new Word(t.label().value())); String word = t.headTerminal(hf).value();
public ArrayList<Word> yieldWords(ArrayList<Word> y) { if (isLeaf()) { y.add(new Word(label())); } else { for (Tree kid : children()) { kid.yieldWords(y); } } return y; }
/** * Changes the ROOT label, and adds a Lexicon.BOUNDARY daughter to it. * This is needed for the dependency parser. * <i>Note:</i> This is a destructive operation on the tree passed in!! * * @param t The current tree into which a boundary is inserted */ public void addRoot(Tree t) { if (t.isLeaf()) { log.info("Warning: tree is leaf: " + t); t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t)); } t.setLabel(new CategoryWordTag(tlp.startSymbol(), Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG)); List<Tree> preTermChildList = new ArrayList<>(); Tree boundaryTerm = tf.newLeaf(new Word(Lexicon.BOUNDARY));//CategoryWordTag(Lexicon.BOUNDARY,Lexicon.BOUNDARY,"")); preTermChildList.add(boundaryTerm); Tree boundaryPreTerm = tf.newTreeNode(new CategoryWordTag(Lexicon.BOUNDARY_TAG, Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG), preTermChildList); List<Tree> childList = t.getChildrenAsList(); childList.add(boundaryPreTerm); t.setChildren(childList); }
for (Tree node : subTrees()) { if (node.isLeaf() || node.children().length < 2) { continue; Tree hwt = node.headTerminal(hf); if (hwt != null) { w = new Word(hwt.label()); w = new Word(((HasWord) l).word()); Tree dwt = child.headTerminal(hf); if (dwt != null) { dw = new Word(dwt.label()); dw = new Word(((HasWord) dl).word()); if (w != null && w.word() != null && dw != null && w.word().equals(dw.word()) && !seenHead) { seenHead = true; } else {
/** * Create a new word, where the label is formed from * the <code>String</code> passed in. * * @param word The word that will go into the <code>Word</code> * @return The new label */ public Label newLabelFromString(String word) { return new Word(word); }
Node node = new Node(); node.setLabel(parse.value()); for (edu.stanford.nlp.trees.Tree pt : parse.getChildrenAsList()) { if (!node.isSetChildren()) { node.setChildren(new TreeMap<Integer, String>()); if (pt.isLeaf()) { continue; } else { List<Word> words = parse.yieldWords(); span.setStart(words.get(0).beginPosition() + offset); span.setEnding(words.get(words.size() - 1).endPosition() + offset); Span rawSpan = getRawSpan(span); node.setSpan(rawSpan);
List<HasWord> sentence = new ArrayList<>(); for (String word : sent) { sentence.add(new Word(word)); parse.pennPrint(); parse.pennPrint(); System.out.println(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); for (Label lab : parse.yield()) { if (lab instanceof CoreLabel) { System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));
/** * Returns a "pretty" version of the words in this Document suitable for * display. The default implementation returns each of the words in * this Document separated * by spaces. Specifically, each element that implements {@link HasWord} * has its * {@link HasWord#word} printed, and other elements are skipped. * * Subclasses that maintain additional information may which to * override this method. */ public String presentableText() { StringBuilder sb = new StringBuilder(); for (Word cur : this) { if (sb.length() > 0) { sb.append(' '); } sb.append(cur.word()); } return (sb.toString()); }
@Override public StringInText[] tokenizeplus(String sentence) { Reader r = new StringReader(sentence); List<StringInText> l = new ArrayList<>(); for (String s : tokenize(sentence)) { Word w = new Word(s); l.add(new StringInText(w.word(), w.beginPosition() + startpos, w .endPosition() + startpos)); } StringInText[] tok = new StringInText[l.size()]; // tok[0]=new StringInText(is2.io.CONLLReader09.ROOT,0,0); int i = 0; for (StringInText s : l) tok[i++] = s; startpos += (1 + sentence.length()); return tok; } }
Tree gold = goldTop.firstChild(); List<HasWord> goldSentence = gold.yieldHasWord(); if (goldSentence.size() > maxLength) { log.info("Skipping sentence; too long: " + goldSentence.size()); tree.pennPrint(pw); } else { Iterator sentIter = s.iterator(); for (; ;) { Word word = (Word) sentIter.next(); pw.print(word.word()); if (sentIter.hasNext()) { pw.print(" ");
private static ArrayList<Word> postProcessSentence(ArrayList<Word> sent) { ArrayList<Word> newSent = new ArrayList<>(); for(Word word : sent) { if(newSent.size() > 0) { String prevWord = newSent.get(newSent.size()-1).toString(); String curWord = word.toString(); String prevChar = prevWord.substring(prevWord.length()-1); String curChar = curWord.substring(0,1); if(!isChinese(prevChar) && !isChinese(curChar)) { Word mergedWord = new Word(prevWord+curWord); newSent.set(newSent.size()-1, mergedWord); //printlnErr("merged: "+mergedWord); //printlnErr("merged: "+mergedWord+" from: "+prevWord+" and: "+curWord); continue; } } newSent.add(word); } return new ArrayList<>(newSent); }