edu.stanford.nlp.ling.CoreLabel.word java code examples

Refine search

 private static int numEntitiesInList(Mention m) {
  int num = 0;
  for(int i=1 ; i < m.originalSpan.size() ; i++) {
   CoreLabel cl = m.originalSpan.get(i);
   if(cl.word().equals(",")) num++;
   if((cl.word().equalsIgnoreCase("and") || cl.word().equalsIgnoreCase("or"))
     && !m.originalSpan.get(i-1).word().equals(",")) num++;
  }

  return num;
 }
}

@Override
public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
 for (CoreLabel wi : doc) {
  String answer = wi.get(CoreAnnotations.AnswerAnnotation.class);
  String goldAnswer = wi.get(CoreAnnotations.GoldAnswerAnnotation.class);
  out.println(wi.word() + "\t" + goldAnswer + "\t" + answer);
 }
 out.println();
}

private static Set<String> getPropers(Mention m) {
 Set<String> propers = new HashSet<>();
 for (int i = m.startIndex; i < m.endIndex; i++) {
  CoreLabel cl = m.sentenceWords.get(i);
  String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class);
  String word = cl.word().toLowerCase();
  if (PROPERS.contains(POS)) {
   propers.add(word);
  }
 }
 return propers;
}

int rSize = regex.size();
for (int start = searchStart, end = document.size() - regex.size(); start <= end; start++) {
 boolean failed = false;
 for (int i = 0; i < rSize; i++) {
  Pattern pattern = regex.get(i);
  String exact = entry.exact.get(i);
  CoreLabel token = document.get(start + i);
  String NERType = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
  String currentType = token.get(CoreAnnotations.AnswerAnnotation.class);
    (exact != null && ! (ignoreCase ? exact.equalsIgnoreCase(token.word()) : exact.equals(token.word()))) ||
    ! (entry.overwritableTypes.contains(NERType) || myLabels.contains(NERType))  ||
    ! pattern.matcher(token.word()).matches()  // last, as this is likely the expensive operation
    ) {
   failed = true;

/**
 * Look for a distance of up to 3 for something that indicates weight not
 * money.
 *
 * @param pl The list of CoreLabel
 * @param i The position to scan right from
 * @return whether a weight word is found
 */
private static boolean leftScanFindsWeightWord(List<CoreLabel> pl, int i) {
 if (DEBUG) {
  log.info("leftScan from: " + pl.get(i).word());
 }
 for (int j = i - 1; j >= 0 && j >= i - 3; j--) {
  CoreLabel fl = pl.get(j);
  if (fl.word().startsWith("weigh")) {
   if (DEBUG) {
    log.info("leftScan found weight: " + fl.word());
   }
   return true;
  }
 }
 return false;
}

/**
 * Look along CD words and see if next thing is a money word.
 *
 * @param pl The list of CoreLabel
 * @param i The position to scan right from
 * @return Whether a money word is found
 */
private static boolean rightScanFindsMoneyWord(List<CoreLabel> pl, int i) {
 int j = i;
 if (DEBUG) {
  log.info("rightScan from: " + pl.get(j).word());
 }
 int sz = pl.size();
 while (j < sz && pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
  j++;
 }
 if (j >= sz) {
  return false;
 }
 String tag = pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class);
 String word = pl.get(j).word();
 if (DEBUG) {
  log.info("rightScan testing: " + word + '/' + tag + "; answer is: " + Boolean.toString((tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches()));
 }
 return (tag.equals("M") || tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches();
}

 if (! wi.get(CoreAnnotations.AnswerAnnotation.class).equals(wi.get(CoreAnnotations.GoldAnswerAnnotation.class))) {
  wrong++;
 if (!THREE_CLASSES && wi.get(CoreAnnotations.AnswerAnnotation.class).equals("UPPER")) {
  sb.append(wi.word().toUpperCase());
 } else if (wi.get(CoreAnnotations.AnswerAnnotation.class).equals("LOWER")) {
  sb.append(wi.word().toLowerCase());
 } else if (wi.get(CoreAnnotations.AnswerAnnotation.class).equals("INIT_UPPER")) {
  sb.append(wi.word().substring(0,1).toUpperCase())
   .append(wi.word().substring(1));
 } else if (wi.get(CoreAnnotations.AnswerAnnotation.class).equals("O")) {
  sb.append(wi.word());
  Matcher alphaMatcher = alphabet.matcher(wi.word());
  if (alphaMatcher.matches()) {
   sb.append("/MIX");
System.err.printf("> wrong = %d ; total = %d%n", wrong, doc.size());
out.println();

private SimpleMatrix getAverageEmbedding(List<CoreLabel> words) {
 SimpleMatrix emb = new SimpleMatrix(staticWordEmbeddings.getEmbeddingSize(), 1);
 for (CoreLabel word : words) {
  emb = emb.plus(getStaticWordEmbedding(word.word()));
 }
 return emb.divide(Math.max(1, words.size()));
}

/** Write a standard CoNLL format output file.
 *
 *  @param doc The document: A List of CoreLabel
 *  @param out Where to send the answers to
 */
@Override
@SuppressWarnings({"StringEquality", "StringContatenationInLoop"})
public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
 // boolean tagsMerged = flags.mergeTags;
 // boolean useHead = flags.splitOnHead;
 if ( ! "iob1".equalsIgnoreCase(flags.entitySubclassification)) {
  deEndify(doc);
 }
 for (CoreLabel fl : doc) {
  String word = fl.word();
  if (word == BOUNDARY) { // Using == is okay, because it is set to constant
   out.println();
  } else {
   String gold = fl.getString(CoreAnnotations.GoldAnswerAnnotation.class);
   String guess = fl.get(CoreAnnotations.AnswerAnnotation.class);
   // log.info(word + "\t" + gold + "\t" + guess));
   String pos = fl.getString(CoreAnnotations.PartOfSpeechAnnotation.class);
   String chunk = fl.getString(CoreAnnotations.ChunkAnnotation.class);
   out.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' +
         gold + '\t' + guess);
  }
 }
}

protected ArrayList<Integer> scanForPronouns(Pair<Integer, Integer> nonQuoteRun) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 ArrayList<Integer> pronounList = new ArrayList<>();
 for(int i = nonQuoteRun.first; i <= nonQuoteRun.second && i < tokens.size() ; i++)
 {
  if(tokens.get(i).word().equalsIgnoreCase("he") || tokens.get(i).word().equalsIgnoreCase("she"))
   pronounList.add(i);
 }
 return pronounList;
}

 String label = as.getKey();
 boolean lastwordlabeled = lastWordLabeled.get(label);
 if (s.get(as.getValue()).equals(label)) {
  if (!lastwordlabeled) {
   startingLabels.add(label);
for(int i = listEndedLabels.size() -1 ; i >=0; i--)
 str += " </" + listEndedLabels.get(i) + ">";
for(String label : startingLabels){
 str += " <" + label + "> ";
str += " " + s.word();
writer.write(str.trim() + " ");

private static boolean mentionIsRangren(Mention m, List<CoreLabel> sent) {
 if (m.spanToString().equals("人") && m.startIndex > 0) {
  String priorWord = sent.get(m.startIndex - 1).word();
  // cdm [2016]: This test matches everything because of the 3rd clause! That can't be right!
  if (priorWord.endsWith("让") || priorWord.endsWith("令") || priorWord.endsWith("")) {
   return true;
  }
 }
 return false;
}

/**
 * Look along CD words and see if next thing is a money word
 * like cents or pounds.
 *
 * @param pl The list of CoreLabel
 * @param i The position to scan right from
 * @return Whether a money word is found
 */
private static boolean rightScanFindsMoneyWord(List<CoreLabel> pl, int i) {
 int j = i;
 if (DEBUG) {
  log.info("rightScan from: " + pl.get(j).word());
 }
 int sz = pl.size();
 while (j < sz && pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
  j++;
 }
 if (j >= sz) {
  return false;
 }
 String tag = pl.get(j).getString(CoreAnnotations.PartOfSpeechAnnotation.class);
 String word = pl.get(j).word();
 if (DEBUG) {
  log.info("rightScan testing: " + word + '/' + tag + "; answer is: " + Boolean.toString((tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches()));
 }
 return (tag.equals("NN") || tag.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(word).matches();
}

for(CoreLabel l: doc){
 if(l.word().equals(CoNLLDocumentReaderAndWriter.BOUNDARY) || l.word().equals("-DOCSTART-")){
  if(words.size() > 0){
   num++;
   String docid = sentIDprefix + "-"+String.valueOf(num);
 words.add(l.word());
 l.set(CoreAnnotations.ValueAnnotation.class, l.word());
 String label = l.get(CoreAnnotations.AnswerAnnotation.class);
 l.set(CoreAnnotations.TextAnnotation.class, l.word());
 l.set(CoreAnnotations.OriginalTextAnnotation.class, l.word());
if(words.size() > 0){
 num++;
 String docid = sentIDprefix + "-"+String.valueOf(num);;

 private static List<String> getContentWords(Mention m) {
  List<String> words = new ArrayList<>();
  for (int i = m.startIndex; i < m.endIndex; i++) {
   CoreLabel cl = m.sentenceWords.get(i);
   String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class);
   if (POS.equals("NN") || POS.equals("NNS") || POS.equals("NNP") || POS.equals("NNPS")) {
    words.add(cl.word().toLowerCase());
   }
  }
  return words;
 }
}

if (chunkIndexEnd > chunkList.size()) {
 assert(false);
cl.setValue(cl.word());
cl.setOriginalText(cl.word());

@Override
public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
 for (CoreLabel wi : doc) {
  String answer = wi.get(CoreAnnotations.AnswerAnnotation.class);
  String goldAnswer = wi.get(CoreAnnotations.GoldAnswerAnnotation.class);
  out.println(wi.word() + '\t' + goldAnswer + '\t' + answer);
 }
 out.println();
}

/**
 * Outputs a partial CONLL-U file with token information (form, lemma, POS)
 * but without any dependency information.
 *
 * @param sentence
 * @return
 */
public String printPOSAnnotations(CoreMap sentence) {
  StringBuilder sb = new StringBuilder();
  for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
    String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
    String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
    String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
    String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
    String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
    sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(),
      lemma, upos , pos, featuresString, "_", "_", "_", misc));
  }
  sb.append("\n");
  return sb.toString();
 }

public List<Integer> scanForAnimates(Pair<Integer, Integer> span) {
 List<Integer> animateIndices = new ArrayList<>();
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 for(int i = span.first; i <= span.second && i < tokens.size() ; i++)
 {
  CoreLabel token = tokens.get(i);
  if(animacySet.contains(token.word()))
   animateIndices.add(i);
 }
 return animateIndices;
}

for(int i = 0; i < tokenSequence.size(); i ++){
 CoreLabel crt = tokenSequence.get(i);
 CoreLabel next = (i < tokenSequence.size() - 1 ? tokenSequence.get(i + 1) : null);
 CoreLabel prev = (i > 0 ? tokenSequence.get(i - 1) : null);
 if (CURRENCY_SYMBOL_PATTERN.matcher(crt.word()).matches() && next != null &&
   (next.get(CoreAnnotations.AnswerAnnotation.class).equals("NUMBER") || "CD".equals(next.tag()))) {
  crt.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
  i = changeLeftToRight(tokenSequence, i + 1,
    next.get(CoreAnnotations.AnswerAnnotation.class),
    next.tag(), "MONEY") - 1;
 else if((CURRENCY_WORD_PATTERN.matcher(crt.word()).matches() ||
      CURRENCY_SYMBOL_PATTERN.matcher(crt.word()).matches()) &&
      prev != null &&
      (prev.get(CoreAnnotations.AnswerAnnotation.class).equals("NUMBER") ||
      "CD".equals(prev.tag())) &&
      ! leftScanFindsWeightWord(tokenSequence, i)) {
 else if ((PERCENT_WORD_PATTERN.matcher(crt.word()).matches() ||
      PERCENT_SYMBOL_PATTERN.matcher(crt.word()).matches()) &&
      prev != null &&
      (prev.get(CoreAnnotations.AnswerAnnotation.class).equals("NUMBER") ||

Popular methods of CoreLabel

get
set
beginPosition
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Running tasks concurrently on multiple threads
onCreateOptionsMenu (Activity)
runOnUiThread (Activity)
setScale (BigDecimal)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Menu (java.awt)
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
CodeWhisperer alternatives

How to use wordmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.word (Showing top 20 results out of 315)

Refine search

How to use
word
method
in
edu.stanford.nlp.ling.CoreLabel