/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex, Class tokenChunkKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(annotation, tokenStartIndex, tokenEndIndex); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
int tokenEnd = i; if (tokenBegin >= 0 && tokenEnd > tokenBegin) { CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.set(labelKey, prevTagType.type); CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokens.size(), totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.set(labelKey, prevTagType.type);
CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, i, i + 1, totalTokensOffset, null, null, null); chunk.set(CoreAnnotations.NamedEntityTagAnnotation.class,"ORGANIZATION");
numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, i)); numStart = i + 1; possibleNumEnd = -1; numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, possibleNumEnd)); if (possibleNumStart >= possibleNumEnd) { numStart = possibleNumStart; numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, i)); numStart = i + 1; numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, possibleNumEnd)); if (possibleNumStart >= possibleNumEnd) { numStart = possibleNumStart; numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, i)); numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, i)); if (possibleNumEnd >= 0) { if (numStart < possibleNumEnd) { numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, possibleNumEnd)); numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, possibleNumStart, i + 1)); } else { numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, possibleNumEnd + 1, i + 1)); numbers.add(ChunkAnnotationUtils.getAnnotatedChunk(annotation, numStart, i + 1));
CoreLabel token = tokens.get(tokenIndex); if (kbpIsPronominalMention(token)) { CoreMap pronoun = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenIndex, tokenIndex + 1, annoTokenBegin, null, CoreAnnotations.TextAnnotation.class, null); pronoun.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
int endToken = nerSpan.second(); /* inclusive */ String label = nerSpan.third(); CoreMap nerChunk = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken+1); nerChunk.set(CoreAnnotations.NamedEntityTagAnnotation.class, label); nerChunk.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)); int endToken = corefSpan.second(); /* inclusive */ String corefId = corefSpan.third(); CoreMap mention = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken+1); mention.set(CorefCoreAnnotations.CorefAnnotation.class, corefId); mention.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
int endToken = nerSpan.second(); /* inclusive */ String label = nerSpan.third(); CoreMap nerChunk = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken+1); nerChunk.set(CoreAnnotations.NamedEntityTagAnnotation.class, label); nerChunk.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)); int endToken = corefSpan.second(); /* inclusive */ String corefId = corefSpan.third(); CoreMap mention = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken+1); mention.set(CorefCoreAnnotations.CorefAnnotation.class, corefId); mention.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex, Class tokenChunkKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(annotation, tokenStartIndex, tokenEndIndex); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex, Class tokenChunkKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(annotation, tokenStartIndex, tokenEndIndex); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex, Class tokenChunkKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(annotation, tokenStartIndex, tokenEndIndex); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex, Class tokenChunkKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(annotation, tokenStartIndex, tokenEndIndex); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); String text = annotation.get(CoreAnnotations.TextAnnotation.class); if (text != null) { annotateChunkText(chunk, annotation); } else { annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }