edu.stanford.nlp.ling.TaggedWord.<init> java code examples

/**
 * Make a new label with this <code>String</code> as the value (word).
 * Any other fields of the label would normally be null.
 *
 * @param labelStr The String that will be used for value
 * @return The new TaggedWord (tag will be <code>null</code>)
 */
public Label newLabel(String labelStr) {
 return new TaggedWord(labelStr);
}

/**
 * Create a new <code>TaggedWord Label</code>, where the label is
 * formed from
 * the <code>Label</code> object passed in.  Depending on what fields
 * each label has, other things will be <code>null</code>.
 *
 * @param oldLabel The Label that the new label is being created from
 * @return a new label of a particular type
 */
public Label newLabel(Label oldLabel) {
 return new TaggedWord(oldLabel);
}

/**
 * Make a new label with this <code>String</code> as a value component.
 * Any other fields of the label would normally be null.
 *
 * @param labelStr The String that will be used for value
 * @param options  what to make (use labelStr as word or tag)
 * @return The new TaggedWord (tag or word will be <code>null</code>)
 */
public Label newLabel(String labelStr, int options) {
 if (options == TAG_LABEL) {
  return new TaggedWord(null, labelStr);
 }
 return new TaggedWord(labelStr);
}

/**
 * Create a new word, where the label is formed from
 * the <code>String</code> passed in.  The String is divided according
 * to the divider character.  We assume that we can always just
 * divide on the rightmost divider character, rather than trying to
 * parse up escape sequences.  If the divider character isn't found
 * in the word, then the whole string becomes the word, and the tag
 * is <code>null</code>.
 *
 * @param word The word that will go into the <code>Word</code>
 * @return The new TaggedWord
 */
public Label newLabelFromString(String word) {
 int where = word.lastIndexOf(divider);
 if (where >= 0) {
  return new TaggedWord(word.substring(0, where), word.substring(where + 1));
 } else {
  return new TaggedWord(word);
 }
}

/**
 * Splits the Word w on the character splitChar.
 */
private HasWord splitTag(HasWord w) {
 if (splitChar == 0) {
  return w;
 }
 String s = w.word();
 int split = s.lastIndexOf(splitChar);
 if (split <= 0) {    // == 0 isn't allowed - no empty words!
  return w;
 }
 String word = s.substring(0, split);
 String tag = s.substring(split + 1, s.length());
 return new TaggedWord(word, tag);
}

/**
 * Create an ArrayList as a list of {@code TaggedWord} from two
 * lists of {@code String}, one for the words, and the second for
 * the tags.
 *
 * @param lex  a list whose items are of type {@code String} and
 *             are the words
 * @param tags a list whose items are of type {@code String} and
 *             are the tags
 * @return The Sentence
 */
public static ArrayList<TaggedWord> toTaggedList(List<String> lex, List<String> tags) {
 ArrayList<TaggedWord> sent = new ArrayList<>();
 int ls = lex.size();
 int ts = tags.size();
 if (ls != ts) {
  throw new IllegalArgumentException("Sentence.toSentence: lengths differ");
 }
 for (int i = 0; i < ls; i++) {
  sent.add(new TaggedWord(lex.get(i), tags.get(i)));
 }
 return sent;
}

void primeNext() {
 String line;
 try {
  line = reader.readLine();
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
 if (line == null) {
  next = null;
  return;
 }
 ++numSentences;
 next = new ArrayList<>();
 StringTokenizer st = new StringTokenizer(line);
 //loop over words in a single sentence
 while (st.hasMoreTokens()) {
  String token = st.nextToken();
  int indexUnd = token.lastIndexOf(tagSeparator);
  if (indexUnd < 0) {
   throw new IllegalArgumentException("Data format error: can't find delimiter \"" + tagSeparator + "\" in word \"" + token + "\" (line " + (numSentences+1) + " of " + filename + ')');
  }
  String word = token.substring(0, indexUnd).intern();
  String tag = token.substring(indexUnd + 1).intern();
  next.add(new TaggedWord(word, tag));
 }
}

private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) {
 int sz = twList.size();
 List<TaggedWord> l = new ArrayList<>(sz);
 for (TaggedWord tw : twList) {
  TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
  l.add(tw2);
 }
 return l;
}

next.add(new TaggedWord(word, tag));
try {
 line = reader.readLine();

public TaggedWord toTaggedWord(Index<String> wordIndex,
                Index<String> tagIndex) {
 String wordStr = wordString(wordIndex);
 String tagStr = tagString(tagIndex);
 return new TaggedWord(wordStr, tagStr);
}

private ArrayList<TaggedWord> getTaggedSentence() {
 final boolean hasOffset;
 hasOffset = origWords != null && ! origWords.isEmpty() && (origWords.get(0) instanceof HasOffset);
 ArrayList<TaggedWord> taggedSentence = new ArrayList<>();
 for (int j = 0; j < size - 1; j++) {
  String tag = finalTags[j];
  TaggedWord w = new TaggedWord(sent.get(j), tag);
  if (hasOffset) {
   HasOffset offset = (HasOffset) origWords.get(j);
   w.setBeginPosition(offset.beginPosition());
   w.setEndPosition(offset.endPosition());
  }
  taggedSentence.add(w);
 }
 return taggedSentence;
}

TaggedWord tw1 = new TaggedWord("w", "t");
c.incrementCount(tw1);
TaggedWord tw2 = new TaggedWord("w", "t2");
System.out.println(c.containsKey(tw2));
System.out.println(tw1.equals(tw2));

String tag = tagIndex.get(POSbacktrace[start][end]);
words.add(new TaggedWord(word, tag));
start = end;

for (TaggedWord word : sentence) {
 TaggedWord newWord =
  new TaggedWord(maxentTagger.wordFunction.apply(word.word()),
          word.tag());
 newSentence.add(newWord);

List<TaggedWord> sentence3 = new ArrayList<>();
for (int i = 0; i < sent3.length; i++) {
 sentence3.add(new TaggedWord(sent3[i], tag3[i]));

/**
 * Gets the tagged yield of the tree -- that is, get the preterminals
 * as well as the terminals.  The {@code Label} of all leaf nodes
 * is returned
 * as a list ordered by the natural left to right order of the
 * leaves.  Null values, if any, are inserted into the list like any
 * other value.  This has been rewritten to thread, so only one List
 * is used.
 * <p/>
 * <i>Implementation note:</i> when we summon up enough courage, this
 * method will be changed to take and return a {@code List<W extends TaggedWord>}.
 *
 * @param ty The list in which the tagged yield of the tree will be
 *           placed. Normally, this will be empty when the routine is called,
 *           but if not, the new yield is added to the end of the list.
 * @return a {@code List} of the data in the tree's leaves.
 */
public <X extends List<TaggedWord>> X taggedYield(X ty) {
 if (isPreTerminal()) {
  ty.add(new TaggedWord(firstChild().label(), label()));
 } else {
  for (Tree kid : children()) {
   kid.taggedYield(ty);
  }
 }
 return ty;
}

@Override
public final void trainUnannotated(List<TaggedWord> sentence,
                  double weight) {
 uwModelTrainer.incrementTreesRead(weight);
 int loc = 0;
 for (TaggedWord tw : sentence) {
  String baseTag = op.langpack().basicCategory(tw.tag());
  Counter<String> counts = baseTagCounts.get(baseTag);
  if (counts == null) {
   ++loc;
   continue;
  }
  double totalCount = counts.totalCount();
  if (totalCount == 0) {
   ++loc;
   continue;
  }
  for (String tag : counts.keySet()) {
   TaggedWord newTW = new TaggedWord(tw.word(), tag);
   train(newTW, loc, weight * counts.getCount(tag) / totalCount);
  }
  ++loc;
 }
}

TaggedWord head = new TaggedWord(word, tag);
result = binarizeLocalTree(result, headNum, head);

for (int state = 0; state < stateWeights.length; ++state) {
 TaggedWord tw = new TaggedWord(word, state(tag, state));
 tempLex.train(tw, position, (Math.exp(stateWeights[state]) + smoothing) * scale);

TaggedWord tw = new TaggedWord(word, state(tag, 0));
lex.train(tw, position, weight);
return (position + 1);

Javadoc

Create a new TaggedWord. It will have null for its content fields.

Popular methods of TaggedWord

Popular in Java

Finding current android device location
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
scheduleAtFixedRate (ScheduledExecutorService)
onRequestPermissionsResult (Fragment)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Top 12 Jupyter Notebook extensions

How to use edu.stanford.nlp.ling.TaggedWordconstructor

Best Java code snippets using edu.stanford.nlp.ling.TaggedWord.<init> (Showing top 20 results out of 315)

How to use
edu.stanford.nlp.ling.TaggedWord
constructor