edu.stanford.nlp.ling.CoreLabel.originalText java code examples

@Override
public String originalText() {
 return label.originalText();
}

private void prepareHeidelTimeInput(PrintWriter stream, CoreMap document) {
 // We really should use the full text annotation because our cleanxml can be useless.
 for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
  for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
   String text = token.originalText();
   stream.append(TRANSLATE.getOrDefault(text, text));
   // HACK: will not handle contractions like "del = de + el" properly -- will be deel.
   // stream.append(token.after().length() > 0 ? " " : "");
   // HACK: will not handle things like 12-abr-2011 which are chunked up properly into 12 - abr-2011.
   stream.append(" ");
  }
  stream.append("\n");
 }
}

protected static void findHeadChinese(List<CoreLabel> sent, Mention m) {
 int headPos = m.endIndex - 1;
 // Skip trailing punctuations
 while (headPos > m.startIndex && sent.get(headPos).tag().equals("PU")) {
  headPos--;
 }
 // If we got right to the end without finding non punctuation, reset to end again
 if (headPos == m.startIndex && sent.get(headPos).tag().equals("PU")) {
  headPos = m.endIndex - 1;
 }
 if (sent.get(headPos).originalText().equals("自己") && m.endIndex != m.startIndex && headPos > m.startIndex) {
  if (!sent.get(headPos-1).tag().equals("PU"))
   headPos--;
 }
 m.headIndex = headPos;
 m.headWord = sent.get(headPos);
 m.headString = m.headWord.get(CoreAnnotations.TextAnnotation.class);
}

if (tokens.size() > 0) {
 CoreLabel token = tokens.get(0);
 if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
 last = tokens.get(0);
 if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
 last = token;

/**
 * Converts a tree to the Morfette training format.
 */
private static String treeToMorfette(Tree tree) {
 StringBuilder sb = new StringBuilder();
 List<Label> yield = tree.yield();
 List<Label> tagYield = tree.preTerminalYield();
 assert yield.size() == tagYield.size();
 int listLen = yield.size();
 for (int i = 0; i < listLen; ++i) {
  CoreLabel token = (CoreLabel) yield.get(i);
  CoreLabel tag = (CoreLabel) tagYield.get(i);
  String morphStr = token.originalText();
  if (morphStr == null || morphStr.equals("")) {
   morphStr = tag.value();
  }
  String lemma = token.lemma();
  if (lemma == null || lemma.equals("")) {
   lemma = token.value();
  }
  sb.append(String.format("%s %s %s%n", token.value(), lemma, morphStr));
 }
 return sb.toString();
}

/** helper method for creating version of document text without xml. **/
public static String xmlFreeText(String documentText, Annotation annotation) {
 int firstTokenCharIndex =
   annotation.get(CoreAnnotations.TokensAnnotation.class).get(0).get(
     CoreAnnotations.CharacterOffsetBeginAnnotation.class);
 // add white space for all text before first token
 String cleanedText =
   documentText.substring(0,firstTokenCharIndex).replaceAll("\\S", " ");
 int tokenIndex = 0;
 List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
 for (CoreLabel token : tokens) {
  // add the current token's text
  cleanedText += token.originalText();
  // add whitespace for non-tokens and xml in between these tokens
  tokenIndex += 1;
  if (tokenIndex < tokens.size()) {
   CoreLabel nextToken = tokens.get(tokenIndex);
   int inBetweenStart = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
   int inBetweenEnd = nextToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
   String inBetweenTokenText = documentText.substring(inBetweenStart, inBetweenEnd);
   inBetweenTokenText = inBetweenTokenText.replaceAll("\\S", " ");
   cleanedText += inBetweenTokenText;
  }
 }
 // add white space for all non-token content after last token
 cleanedText += documentText.substring(
   cleanedText.length(), documentText.length()).replaceAll("\\S", " ");
 return cleanedText;
}

sb.append(label.word());
sb.append("\t");
sb.append(label.originalText());
sb.append("\t");
if (outputLemmas) {

if (text.length() == 0) break;
tokenEndIdx++;
String matchStr = token.originalText().trim();

for(Label label : leaves) {
 if(label instanceof CoreLabel)
  morphAnalyses.add(((CoreLabel) label).originalText());

CoreLabel rawToken = (CoreLabel) yield.get(i);
String word = rawToken.value();
String morphStr = rawToken.originalText();
Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, morphStr);
String lemma = lemmaMorph.first();

private static void replacePOSTags(Tree tree) {
 List<Label> yield = tree.yield();
 List<Label> preYield = tree.preTerminalYield();
 assert yield.size() == preYield.size();
 MorphoFeatureSpecification spec = new FrenchMorphoFeatureSpecification();
 for(int i = 0; i < yield.size(); i++) {
  // Morphological Analysis
  String morphStr = ((CoreLabel) yield.get(i)).originalText();
  if (morphStr == null || morphStr.equals("")) {
   morphStr = preYield.get(i).value();
   // POS subcategory
   String subCat = ((CoreLabel) yield.get(i)).category();
   if (subCat != null && subCat != "") {
    morphStr += "-" + subCat + "--";
   } else {
    morphStr += "---";
   }
  }
  MorphoFeatures feats = spec.strToFeatures(morphStr);
  if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
   CoreLabel cl = (CoreLabel) preYield.get(i);
   cl.setValue(feats.getAltTag());
   cl.setTag(feats.getAltTag());
  }
 }
}

String morphStr = coreLabel.originalText();
if(morphStr == null || morphStr.equals("")) {
 morphStr = MorphoFeatureSpecification.NO_ANALYSIS;

String featureStr = ((CoreLabel) yield.get(i)).originalText();
Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureStr);
String lemma = lemmaMorph.first();

@Override
public Tree transformTree(Tree t, Tree root) {
 String baseCat = t.value();
 StringBuilder newCategory = new StringBuilder();
 //Add manual state splits
 for (Pair<TregexPattern,Function<TregexMatcher,String>> e : activeAnnotations) {
  TregexMatcher m = e.first().matcher(root);
  if (m.matchesAt(t))
   newCategory.append(e.second().apply(m));
 }
 // WSGDEBUG
 //Add morphosyntactic features if this is a POS tag
 if(t.isPreTerminal() && tagSpec != null) {
  if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null )
   throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString()));
  String morphoStr = ((CoreLabel) t.firstChild().label()).originalText();
  MorphoFeatures feats = tagSpec.strToFeatures(morphoStr);
  baseCat = feats.getTag(baseCat);
 }
 //Update the label(s)
 String newCat = baseCat + newCategory.toString();
 t.setValue(newCat);
 if (t.isPreTerminal() && t.label() instanceof HasTag)
  ((HasTag) t.label()).setTag(newCat);
 return t;
}

@Override
public Tree transformTree(Tree t, Tree root) {
 // Perform tregex-powered annotations
 t = super.transformTree(t, root);
 String cat = t.value();
 //Add morphosyntactic features if this is a POS tag
 if(t.isPreTerminal() && tagSpec != null) {
  if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null )
   throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString()));
  String morphoStr = ((CoreLabel) t.firstChild().label()).originalText();
  Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString("", morphoStr);
  MorphoFeatures feats = tagSpec.strToFeatures(lemmaMorph.second());
  cat = feats.getTag(cat);
 }
 //Update the label(s)
 t.setValue(cat);
 if (t.isPreTerminal() && t.label() instanceof HasTag)
  ((HasTag) t.label()).setTag(cat);
 return t;
}

  s += word.originalText()+word.after();
  prev.set(CoreAnnotations.AfterAnnotation.class, s);
 prevString += word.before() + word.originalText();
  s += word.originalText()+word.after();
  prev.set(CoreAnnotations.AfterAnnotation.class, s);
 prevString += word.before() + word.originalText();
   prevString += word.before() + word.originalText();
  } else if (word.word().equalsIgnoreCase("</doc>")) {
   String s = prev.get(CoreAnnotations.AfterAnnotation.class);
   s += word.originalText();
   prev.set(CoreAnnotations.AfterAnnotation.class, s);
  } else {
   if (prev != null) {
    String s = prev.get(CoreAnnotations.AfterAnnotation.class);
    s += word.originalText() + word.after();
    prev.set(CoreAnnotations.AfterAnnotation.class, s);
   prevString += word.before() + word.originalText();
CoreLabel wi = new CoreLabel();
wi.setWord(word.word());
wi.set(CoreAnnotations.OriginalTextAnnotation.class, word.originalText());
wi.set(CoreAnnotations.BeforeAnnotation.class, prevString+word.before());
wi.set(CoreAnnotations.AfterAnnotation.class, word.after());

String contextStr = getCoreLabel(start).originalText();
float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {

String morphStr = childLabel.originalText();
if (morphStr == null || morphStr.equals("")) {
 morphStr = label.value();

String contextStr = getCoreLabel(start).originalText();
float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {

String featureStr = ((CoreLabel) yield.get(i)).originalText();
Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureStr);
String lemma = lemmaMorph.first();

Popular methods of CoreLabel

get
set
word
beginPosition
endPosition
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Start an intent from android
getContentResolver (Context)
putExtra (Intent)
getSupportFragmentManager (FragmentActivity)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
ImageIO (javax.imageio)
Top PhpStorm plugins

How to use originalTextmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.originalText (Showing top 20 results out of 315)

How to use
originalText
method
in
edu.stanford.nlp.ling.CoreLabel