edu.stanford.nlp.ling.CoreLabel.beginPosition java code examples

@Override
public int beginPosition() {
 return label.beginPosition();
}

@Override
public Item convertItemSpan(Item item) {
 if(bestSegmentationB == null || bestSegmentationB.isEmpty())
  throw new RuntimeException(this.getClass().getName() + ": No 1best segmentation available");
 item.start = bestSegmentationB.get(item.start).beginPosition();
 item.end = bestSegmentationB.get(item.end - 1).endPosition();
 return item;
}

private static Optional<CoreMap> overlapsWithMention(CoreMap needle, List<CoreMap> haystack) {
 List<CoreLabel> tokens = needle.get(CoreAnnotations.TokensAnnotation.class);
 int charBegin = tokens.get(0).beginPosition();
 int charEnd = tokens.get(tokens.size()-1).endPosition();
 return (haystack.stream().filter(mention_ -> {
  List<CoreLabel> tokens_ = mention_.get(CoreAnnotations.TokensAnnotation.class);
  int charBegin_ = tokens_.get(0).beginPosition();
  int charEnd_ = tokens_.get(tokens_.size()-1).endPosition();
  // Check overlap
  return !(charBegin_ > charEnd || charEnd_ < charBegin);
 }).findFirst());
}

public static String tokensToString(List<CoreLabel> tokens) {
 StringBuffer os = new StringBuffer();
 boolean first = true;
 for(CoreLabel t: tokens){
  if(! first) os.append(" ");
  os.append(t.word() + "{" + t.beginPosition() + ", " + t.endPosition() + "}");
  first = false;
 }
 return os.toString();
}

private static List<CoreLabel> tokensForCharacters(List<CoreLabel> tokens, int charBegin, int charEnd) {
 assert charBegin >= 0;
 List<CoreLabel> segment = Generics.newArrayList();
 for(CoreLabel token: tokens) {
  if (token.endPosition() < charBegin || token.beginPosition() >= charEnd) {
   continue;
  }
  segment.add(token);
 }
 return segment;
}

/**
 * Joins all the tokens together (more or less) according to their original whitespace.
 * It assumes all whitespace was " ".
 *
 * @param tokens list of tokens which implement {@link HasOffset} and {@link HasWord}
 * @return a string of the tokens with the appropriate amount of spacing
 */
public static String joinWithOriginalWhiteSpace(List<CoreLabel> tokens) {
 if (tokens.isEmpty()) {
  return "";
 }
 CoreLabel lastToken = tokens.get(0);
 StringBuilder buffer = new StringBuilder(lastToken.word());
 for (int i = 1; i < tokens.size(); i++) {
  CoreLabel currentToken = tokens.get(i);
  int numSpaces = currentToken.beginPosition() - lastToken.endPosition();
  if (numSpaces < 0) {
   numSpaces = 0;
  }
  buffer.append(repeat(' ', numSpaces)).append(currentToken.word());
  lastToken = currentToken;
 }
 return buffer.toString();
}

public  boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel startToken = tokens.get(tokenRange.first());
 CoreLabel endToken = tokens.get(tokenRange.second());
 int startTokenCharBegin  = startToken.beginPosition();
 int endTokenCharEnd = endToken.endPosition();
 return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}

int secondStart = cl.beginPosition() + secondOffset;
int secondEnd = secondStart + secondLength;
compoundBuffer.add(copyCoreLabel(cl, second, secondStart, secondEnd));
return copyCoreLabel(cl, first, cl.beginPosition(), secondStart);

/**
 * Splits a compound marked by the lexer.
 */
private CoreLabel processCompound(CoreLabel cl) {
 cl.remove(ParentAnnotation.class);
 String[] parts = pSpace.split(pDash.matcher(cl.word()).replaceAll(" - "));
 int lengthAccum = 0;
 for (String part : parts) {
  CoreLabel newLabel = new CoreLabel(cl);
  newLabel.setWord(part);
  newLabel.setValue(part);
  newLabel.setBeginPosition(cl.beginPosition() + lengthAccum);
  newLabel.setEndPosition(cl.beginPosition() + lengthAccum + part.length());
  newLabel.set(OriginalTextAnnotation.class, part);
  compoundBuffer.add(newLabel);
  lengthAccum += part.length();
 }
 return compoundBuffer.remove(0);
}

sb.append(label.beginPosition());
sb.append("\t");
sb.append(label.endPosition());

/**
 * set isNewline()
 */
private static void setNewlineStatus(List<CoreLabel> tokensList) {
 // label newlines
 for (CoreLabel token : tokensList) {
  if (token.word().equals(AbstractTokenizer.NEWLINE_TOKEN) && (token.endPosition() - token.beginPosition() == 1))
   token.set(CoreAnnotations.IsNewlineAnnotation.class, true);
  else
   token.set(CoreAnnotations.IsNewlineAnnotation.class, false);
 }
}

int secondStart = cl.beginPosition() + secondOffset;
int secondEnd = secondStart + secondLength;
compoundBuffer.add(copyCoreLabel(cl, second, secondStart, secondEnd));
return copyCoreLabel(cl, first, cl.beginPosition(), secondStart);

public String tokenRangeToString(Pair<Integer, Integer> tokenRange) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 // see if the token range matches an entity mention
 List<CoreMap> entityMentionsInDoc = doc.get(CoreAnnotations.MentionsAnnotation.class);
 Integer potentialMatchingEntityMentionIndex =
   tokens.get(tokenRange.first).get(CoreAnnotations.EntityMentionIndexAnnotation.class);
 CoreMap potentialMatchingEntityMention = null;
 if (entityMentionsInDoc != null && potentialMatchingEntityMentionIndex != null) {
  potentialMatchingEntityMention = entityMentionsInDoc.get(potentialMatchingEntityMentionIndex);
 }
 // if there is a matching entity mention, return it's text (which has been processed to remove
 // things like newlines and xml)...if there isn't return the full substring of the document text
 if (potentialMatchingEntityMention != null &&
   potentialMatchingEntityMention.get(
     CoreAnnotations.CharacterOffsetBeginAnnotation.class) == tokens.get(tokenRange.first).beginPosition() &&
   potentialMatchingEntityMention.get(
     CoreAnnotations.CharacterOffsetEndAnnotation.class) == tokens.get(tokenRange.second).endPosition()) {
  return potentialMatchingEntityMention.get(CoreAnnotations.TextAnnotation.class);
 } else {
  return doc.get(CoreAnnotations.TextAnnotation.class).substring(
    tokens.get(tokenRange.first).beginPosition(), tokens.get(tokenRange.second).endPosition());
 }
}

text.append(token.before());
assert last != null;
int missingWhitespace = (token.beginPosition() - last.endPosition()) - token.before().length();
while (missingWhitespace > 0) {
 text.append(' ');

  System.out.println(p.name + " " + weight + " name");
} else if (mention.type.equals(PRONOUN)) {
 int charBeginKey = doc.get(CoreAnnotations.TokensAnnotation.class).get(mention.begin).beginPosition();
 Person p = doCoreference(charBeginKey, quote);
 if (p != null) {

CoreLabel token = tokens.get(i);
for (IntPair offsets:chunkCharOffsets) {
 assert(token.beginPosition() >= 0);
 assert(token.endPosition() >= 0);
 int offsetBegin = offsets.getSource();
    output.add(tokenFactory.makeToken(text.substring(token.beginPosition(), offsetBegin),
        token.beginPosition(), offsetBegin-token.beginPosition()));
    output.add(tokenFactory.makeToken(text.substring(offsetBegin,offsetEnd),
        offsetBegin, offsetEnd-offsetBegin));
        offsetEnd, token.endPosition()-offsetEnd));
   } else {
    output.add(tokenFactory.makeToken(text.substring(token.beginPosition(), offsetBegin),
        token.beginPosition(), offsetBegin-token.beginPosition()));
    output.add(tokenFactory.makeToken(text.substring(offsetBegin,token.endPosition()),
        offsetBegin, token.endPosition()-offsetBegin));
   output.add(tokenFactory.makeToken(text.substring(token.beginPosition(),offsetEnd),
       token.beginPosition(), offsetEnd-token.beginPosition()));
   output.add(tokenFactory.makeToken(text.substring(offsetEnd,token.endPosition()), offsetEnd,
       token.endPosition()-offsetEnd));

List<CoreMap> personWithinMatch = titlePersonMatcher.groupNodes(1);
if (overallMatch.size() == corefMentionTokens.size()) {
 int personBeginOffset = ((CoreLabel) personWithinMatch.get(0)).beginPosition();
 int personEndOffset = ((CoreLabel) personWithinMatch.get(personWithinMatch.size()-1)).endPosition();
 Pair<Integer,Integer> personOffsets = new Pair(personBeginOffset, personEndOffset);

out.println("  " + m + ":[" + tokens.get(m.startIndex - 1).beginPosition() + ", " +
    tokens.get(m.endIndex - 2).endPosition() + ')');

/**
 * Handles verbs with attached suffixes, marked by the lexer:
 *
 * Escribamosela =&gt; Escribamo + se + la =&gt; escribamos + se + la
 * Sentaos =&gt; senta + os =&gt; sentad + os
 * Damelo =&gt; da + me + lo
 *
 */
private CoreLabel processVerb(CoreLabel cl) {
 cl.remove(ParentAnnotation.class);
 SpanishVerbStripper.StrippedVerb stripped = verbStripper.separatePronouns(cl.word());
 if (stripped == null) {
  return cl;
 }
 // Split the CoreLabel into separate labels, tracking changing begin + end
 // positions.
 int stemEnd = cl.beginPosition() + stripped.getOriginalStem().length();
 int lengthRemoved = 0;
 for (String pronoun : stripped.getPronouns()) {
  int beginOffset = stemEnd + lengthRemoved;
  compoundBuffer.add(copyCoreLabel(cl, pronoun, beginOffset));
  lengthRemoved += pronoun.length();
 }
 CoreLabel stem = copyCoreLabel(cl, stripped.getStem(), cl.beginPosition(), stemEnd);
 stem.setOriginalText(stripped.getOriginalStem());
 return stem;
}

public List<CoreLabel> segmentStringToTokenList(String line) {
 List<CoreLabel> tokenList = CollectionUtils.makeList();
 List<CoreLabel> labeledSequence = segmentStringToIOB(line);
 for (IntPair span : IOBUtils.TokenSpansForIOB(labeledSequence)) {
  CoreLabel token = new CoreLabel();
  String text = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker,
    span.getSource(), span.getTarget());
  token.setWord(text);
  token.setValue(text);
  token.set(CoreAnnotations.TextAnnotation.class, text);
  token.set(CoreAnnotations.ArabicSegAnnotation.class, "1");
  int start = labeledSequence.get(span.getSource()).beginPosition();
  int end = labeledSequence.get(span.getTarget() - 1).endPosition();
  token.setOriginalText(line.substring(start, end));
  token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, start);
  token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
  tokenList.add(token);
 }
 return tokenList;
}

Popular methods of CoreLabel

get
set
word
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Start an intent from android
getContentResolver (Context)
putExtra (Intent)
getSupportFragmentManager (FragmentActivity)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
ImageIO (javax.imageio)
Top plugins for Android Studio

How to use beginPositionmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.beginPosition (Showing top 20 results out of 315)

How to use
beginPosition
method
in
edu.stanford.nlp.ling.CoreLabel