/** * Make a new label with this <code>String</code> as the value (word). * Any other fields of the label would normally be null. * * @param labelStr The String that will be used for value * @return The new TaggedWord (tag will be <code>null</code>) */ public Label newLabel(String labelStr) { return new TaggedWord(labelStr); }
/** * Create a new <code>TaggedWord Label</code>, where the label is * formed from * the <code>Label</code> object passed in. Depending on what fields * each label has, other things will be <code>null</code>. * * @param oldLabel The Label that the new label is being created from * @return a new label of a particular type */ public Label newLabel(Label oldLabel) { return new TaggedWord(oldLabel); }
/** * Make a new label with this <code>String</code> as a value component. * Any other fields of the label would normally be null. * * @param labelStr The String that will be used for value * @param options what to make (use labelStr as word or tag) * @return The new TaggedWord (tag or word will be <code>null</code>) */ public Label newLabel(String labelStr, int options) { if (options == TAG_LABEL) { return new TaggedWord(null, labelStr); } return new TaggedWord(labelStr); }
/** * Create a new word, where the label is formed from * the <code>String</code> passed in. The String is divided according * to the divider character. We assume that we can always just * divide on the rightmost divider character, rather than trying to * parse up escape sequences. If the divider character isn't found * in the word, then the whole string becomes the word, and the tag * is <code>null</code>. * * @param word The word that will go into the <code>Word</code> * @return The new TaggedWord */ public Label newLabelFromString(String word) { int where = word.lastIndexOf(divider); if (where >= 0) { return new TaggedWord(word.substring(0, where), word.substring(where + 1)); } else { return new TaggedWord(word); } }
/** * Splits the Word w on the character splitChar. */ private HasWord splitTag(HasWord w) { if (splitChar == 0) { return w; } String s = w.word(); int split = s.lastIndexOf(splitChar); if (split <= 0) { // == 0 isn't allowed - no empty words! return w; } String word = s.substring(0, split); String tag = s.substring(split + 1, s.length()); return new TaggedWord(word, tag); }
/** * Create an ArrayList as a list of {@code TaggedWord} from two * lists of {@code String}, one for the words, and the second for * the tags. * * @param lex a list whose items are of type {@code String} and * are the words * @param tags a list whose items are of type {@code String} and * are the tags * @return The Sentence */ public static ArrayList<TaggedWord> toTaggedList(List<String> lex, List<String> tags) { ArrayList<TaggedWord> sent = new ArrayList<>(); int ls = lex.size(); int ts = tags.size(); if (ls != ts) { throw new IllegalArgumentException("Sentence.toSentence: lengths differ"); } for (int i = 0; i < ls; i++) { sent.add(new TaggedWord(lex.get(i), tags.get(i))); } return sent; }
void primeNext() { String line; try { line = reader.readLine(); } catch (IOException e) { throw new RuntimeException(e); } if (line == null) { next = null; return; } ++numSentences; next = new ArrayList<>(); StringTokenizer st = new StringTokenizer(line); //loop over words in a single sentence while (st.hasMoreTokens()) { String token = st.nextToken(); int indexUnd = token.lastIndexOf(tagSeparator); if (indexUnd < 0) { throw new IllegalArgumentException("Data format error: can't find delimiter \"" + tagSeparator + "\" in word \"" + token + "\" (line " + (numSentences+1) + " of " + filename + ')'); } String word = token.substring(0, indexUnd).intern(); String tag = token.substring(indexUnd + 1).intern(); next.add(new TaggedWord(word, tag)); } }
private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) { int sz = twList.size(); List<TaggedWord> l = new ArrayList<>(sz); for (TaggedWord tw : twList) { TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag())); l.add(tw2); } return l; }
next.add(new TaggedWord(word, tag)); try { line = reader.readLine();
public TaggedWord toTaggedWord(Index<String> wordIndex, Index<String> tagIndex) { String wordStr = wordString(wordIndex); String tagStr = tagString(tagIndex); return new TaggedWord(wordStr, tagStr); }
private ArrayList<TaggedWord> getTaggedSentence() { final boolean hasOffset; hasOffset = origWords != null && ! origWords.isEmpty() && (origWords.get(0) instanceof HasOffset); ArrayList<TaggedWord> taggedSentence = new ArrayList<>(); for (int j = 0; j < size - 1; j++) { String tag = finalTags[j]; TaggedWord w = new TaggedWord(sent.get(j), tag); if (hasOffset) { HasOffset offset = (HasOffset) origWords.get(j); w.setBeginPosition(offset.beginPosition()); w.setEndPosition(offset.endPosition()); } taggedSentence.add(w); } return taggedSentence; }
TaggedWord tw1 = new TaggedWord("w", "t"); c.incrementCount(tw1); TaggedWord tw2 = new TaggedWord("w", "t2"); System.out.println(c.containsKey(tw2)); System.out.println(tw1.equals(tw2));
String tag = tagIndex.get(POSbacktrace[start][end]); words.add(new TaggedWord(word, tag)); start = end;
for (TaggedWord word : sentence) { TaggedWord newWord = new TaggedWord(maxentTagger.wordFunction.apply(word.word()), word.tag()); newSentence.add(newWord);
List<TaggedWord> sentence3 = new ArrayList<>(); for (int i = 0; i < sent3.length; i++) { sentence3.add(new TaggedWord(sent3[i], tag3[i]));
/** * Gets the tagged yield of the tree -- that is, get the preterminals * as well as the terminals. The {@code Label} of all leaf nodes * is returned * as a list ordered by the natural left to right order of the * leaves. Null values, if any, are inserted into the list like any * other value. This has been rewritten to thread, so only one List * is used. * <p/> * <i>Implementation note:</i> when we summon up enough courage, this * method will be changed to take and return a {@code List<W extends TaggedWord>}. * * @param ty The list in which the tagged yield of the tree will be * placed. Normally, this will be empty when the routine is called, * but if not, the new yield is added to the end of the list. * @return a {@code List} of the data in the tree's leaves. */ public <X extends List<TaggedWord>> X taggedYield(X ty) { if (isPreTerminal()) { ty.add(new TaggedWord(firstChild().label(), label())); } else { for (Tree kid : children()) { kid.taggedYield(ty); } } return ty; }
@Override public final void trainUnannotated(List<TaggedWord> sentence, double weight) { uwModelTrainer.incrementTreesRead(weight); int loc = 0; for (TaggedWord tw : sentence) { String baseTag = op.langpack().basicCategory(tw.tag()); Counter<String> counts = baseTagCounts.get(baseTag); if (counts == null) { ++loc; continue; } double totalCount = counts.totalCount(); if (totalCount == 0) { ++loc; continue; } for (String tag : counts.keySet()) { TaggedWord newTW = new TaggedWord(tw.word(), tag); train(newTW, loc, weight * counts.getCount(tag) / totalCount); } ++loc; } }
TaggedWord head = new TaggedWord(word, tag); result = binarizeLocalTree(result, headNum, head);
for (int state = 0; state < stateWeights.length; ++state) { TaggedWord tw = new TaggedWord(word, state(tag, state)); tempLex.train(tw, position, (Math.exp(stateWeights[state]) + smoothing) * scale);
TaggedWord tw = new TaggedWord(word, state(tag, 0)); lex.train(tw, position, weight); return (position + 1);