edu.stanford.nlp.ling.CoreLabel.endPosition java code examples

@Override
public int endPosition() {
 return label.endPosition();
}

@Override
public Item convertItemSpan(Item item) {
 if(bestSegmentationB == null || bestSegmentationB.isEmpty())
  throw new RuntimeException(this.getClass().getName() + ": No 1best segmentation available");
 item.start = bestSegmentationB.get(item.start).beginPosition();
 item.end = bestSegmentationB.get(item.end - 1).endPosition();
 return item;
}

private static Optional<CoreMap> overlapsWithMention(CoreMap needle, List<CoreMap> haystack) {
 List<CoreLabel> tokens = needle.get(CoreAnnotations.TokensAnnotation.class);
 int charBegin = tokens.get(0).beginPosition();
 int charEnd = tokens.get(tokens.size()-1).endPosition();
 return (haystack.stream().filter(mention_ -> {
  List<CoreLabel> tokens_ = mention_.get(CoreAnnotations.TokensAnnotation.class);
  int charBegin_ = tokens_.get(0).beginPosition();
  int charEnd_ = tokens_.get(tokens_.size()-1).endPosition();
  // Check overlap
  return !(charBegin_ > charEnd || charEnd_ < charBegin);
 }).findFirst());
}

public static String tokensToString(List<CoreLabel> tokens) {
 StringBuffer os = new StringBuffer();
 boolean first = true;
 for(CoreLabel t: tokens){
  if(! first) os.append(" ");
  os.append(t.word() + "{" + t.beginPosition() + ", " + t.endPosition() + "}");
  first = false;
 }
 return os.toString();
}

private static List<CoreLabel> tokensForCharacters(List<CoreLabel> tokens, int charBegin, int charEnd) {
 assert charBegin >= 0;
 List<CoreLabel> segment = Generics.newArrayList();
 for(CoreLabel token: tokens) {
  if (token.endPosition() < charBegin || token.beginPosition() >= charEnd) {
   continue;
  }
  segment.add(token);
 }
 return segment;
}

/**
 * Joins all the tokens together (more or less) according to their original whitespace.
 * It assumes all whitespace was " ".
 *
 * @param tokens list of tokens which implement {@link HasOffset} and {@link HasWord}
 * @return a string of the tokens with the appropriate amount of spacing
 */
public static String joinWithOriginalWhiteSpace(List<CoreLabel> tokens) {
 if (tokens.isEmpty()) {
  return "";
 }
 CoreLabel lastToken = tokens.get(0);
 StringBuilder buffer = new StringBuilder(lastToken.word());
 for (int i = 1; i < tokens.size(); i++) {
  CoreLabel currentToken = tokens.get(i);
  int numSpaces = currentToken.beginPosition() - lastToken.endPosition();
  if (numSpaces < 0) {
   numSpaces = 0;
  }
  buffer.append(repeat(' ', numSpaces)).append(currentToken.word());
  lastToken = currentToken;
 }
 return buffer.toString();
}

public  boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel startToken = tokens.get(tokenRange.first());
 CoreLabel endToken = tokens.get(tokenRange.second());
 int startTokenCharBegin  = startToken.beginPosition();
 int endTokenCharEnd = endToken.endPosition();
 return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}

sb.append(label.beginPosition());
sb.append("\t");
sb.append(label.endPosition());
sb.append("\n");

/**
 * set isNewline()
 */
private static void setNewlineStatus(List<CoreLabel> tokensList) {
 // label newlines
 for (CoreLabel token : tokensList) {
  if (token.word().equals(AbstractTokenizer.NEWLINE_TOKEN) && (token.endPosition() - token.beginPosition() == 1))
   token.set(CoreAnnotations.IsNewlineAnnotation.class, true);
  else
   token.set(CoreAnnotations.IsNewlineAnnotation.class, false);
 }
}

public String tokenRangeToString(Pair<Integer, Integer> tokenRange) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 // see if the token range matches an entity mention
 List<CoreMap> entityMentionsInDoc = doc.get(CoreAnnotations.MentionsAnnotation.class);
 Integer potentialMatchingEntityMentionIndex =
   tokens.get(tokenRange.first).get(CoreAnnotations.EntityMentionIndexAnnotation.class);
 CoreMap potentialMatchingEntityMention = null;
 if (entityMentionsInDoc != null && potentialMatchingEntityMentionIndex != null) {
  potentialMatchingEntityMention = entityMentionsInDoc.get(potentialMatchingEntityMentionIndex);
 }
 // if there is a matching entity mention, return it's text (which has been processed to remove
 // things like newlines and xml)...if there isn't return the full substring of the document text
 if (potentialMatchingEntityMention != null &&
   potentialMatchingEntityMention.get(
     CoreAnnotations.CharacterOffsetBeginAnnotation.class) == tokens.get(tokenRange.first).beginPosition() &&
   potentialMatchingEntityMention.get(
     CoreAnnotations.CharacterOffsetEndAnnotation.class) == tokens.get(tokenRange.second).endPosition()) {
  return potentialMatchingEntityMention.get(CoreAnnotations.TextAnnotation.class);
 } else {
  return doc.get(CoreAnnotations.TextAnnotation.class).substring(
    tokens.get(tokenRange.first).beginPosition(), tokens.get(tokenRange.second).endPosition());
 }
}

text.append(token.before());
assert last != null;
int missingWhitespace = (token.beginPosition() - last.endPosition()) - token.before().length();
while (missingWhitespace > 0) {
 text.append(' ');

if (overallMatch.size() == corefMentionTokens.size()) {
 int personBeginOffset = ((CoreLabel) personWithinMatch.get(0)).beginPosition();
 int personEndOffset = ((CoreLabel) personWithinMatch.get(personWithinMatch.size()-1)).endPosition();
 Pair<Integer,Integer> personOffsets = new Pair(personBeginOffset, personEndOffset);
 kbpMentionFound = kbpMentions.get(personOffsets);

for (IntPair offsets:chunkCharOffsets) {
 assert(token.beginPosition() >= 0);
 assert(token.endPosition() >= 0);
 int offsetBegin = offsets.getSource();
 int offsetEnd = offsets.getTarget();
    output.add(tokenFactory.makeToken(text.substring(offsetBegin,offsetEnd),
        offsetBegin, offsetEnd-offsetBegin));
    output.add(tokenFactory.makeToken(text.substring(offsetEnd,token.endPosition()),
        offsetEnd, token.endPosition()-offsetEnd));
   } else {
    output.add(tokenFactory.makeToken(text.substring(token.beginPosition(), offsetBegin),
        token.beginPosition(), offsetBegin-token.beginPosition()));
    output.add(tokenFactory.makeToken(text.substring(offsetBegin,token.endPosition()),
        offsetBegin, token.endPosition()-offsetBegin));
   output.add(tokenFactory.makeToken(text.substring(offsetEnd,token.endPosition()), offsetEnd,
       token.endPosition()-offsetEnd));
  } else {

 names.incrementCount(prevName, 1);
prevName = token.word();
prevEnd = token.endPosition();

tokens.get(m.endIndex - 2).endPosition() + ')');

 origWord = tokenLabel.word();
} else {
 origWord = origText.substring(cl.beginPosition(), cl.endPosition());
 iobList.get(iobList.size() - 1).setEndPosition(cl.endPosition());

if (endRelCharOffset > annoText.length()) { endRelCharOffset = annoText.length(); }
if (allowPartialTokens) {
 while (i < annoTokens.size() && annoTokens.get(i).endPosition() <= beginCharOffset) {
  i++;
 while (j < annoTokens.size() && annoTokens.get(j).endPosition() <= endCharOffset) {
  assert(annoTokens.get(j).beginPosition() >= beginCharOffset);
  j++;

while (i < tokens.size() && tokens.get(i).endPosition() <= end) {
 quoteTokens.add(tokens.get(i));
 i++;

", " + tokens.get(m.endIndex - 2).endPosition() + ')');

public List<CoreLabel> segmentStringToTokenList(String line) {
 List<CoreLabel> tokenList = CollectionUtils.makeList();
 List<CoreLabel> labeledSequence = segmentStringToIOB(line);
 for (IntPair span : IOBUtils.TokenSpansForIOB(labeledSequence)) {
  CoreLabel token = new CoreLabel();
  String text = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker,
    span.getSource(), span.getTarget());
  token.setWord(text);
  token.setValue(text);
  token.set(CoreAnnotations.TextAnnotation.class, text);
  token.set(CoreAnnotations.ArabicSegAnnotation.class, "1");
  int start = labeledSequence.get(span.getSource()).beginPosition();
  int end = labeledSequence.get(span.getTarget() - 1).endPosition();
  token.setOriginalText(line.substring(start, end));
  token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, start);
  token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
  tokenList.add(token);
 }
 return tokenList;
}

Popular methods of CoreLabel

get
set
word
beginPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Running tasks concurrently on multiple threads
onCreateOptionsMenu (Activity)
runOnUiThread (Activity)
setScale (BigDecimal)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Menu (java.awt)
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top 12 Jupyter Notebook extensions

How to use endPositionmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.endPosition (Showing top 20 results out of 315)

How to use
endPosition
method
in
edu.stanford.nlp.ling.CoreLabel