/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
if (origText != null) { ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class);
chunkOffsets.getBegin(), chunkOffsets.getEnd()); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation);
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); boolean annotatedTextFromCharOffsets = annotateChunkText(chunk, annotation); if (!annotatedTextFromCharOffsets) { // Use tokens to get text annotation annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + annotation's TokenBeginAnnotation * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + annotation's TokenBeginAnnotation * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param annotation - Annotation from which to extract the text for this chunk * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(CoreMap annotation, int tokenStartIndex, int tokenEndIndex) { Integer annoTokenBegin = annotation.get(CoreAnnotations.TokenBeginAnnotation.class); if (annoTokenBegin == null) { annoTokenBegin = 0; } List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, annoTokenBegin); String text = annotation.get(CoreAnnotations.TextAnnotation.class); if (text != null) { annotateChunkText(chunk, annotation); } else { annotateChunkText(chunk, CoreAnnotations.TextAnnotation.class); } return chunk; }
/** * Create a new chunk Annotation with basic chunk information * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * TextAnnotation - String extracted from the origAnnotation using character offset information for this chunk * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by * @param tokenChunkKey - If not null, each token is annotated with the chunk using this key * @param tokenTextKey - Key to use to find the token text * @param tokenLabelKey - If not null, each token is annotated with the text associated with the chunk using this key * @return Annotation representing new chunk */ public static Annotation getAnnotatedChunk(List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset, Class tokenChunkKey, Class tokenTextKey, Class tokenLabelKey) { Annotation chunk = getAnnotatedChunk(tokens, tokenStartIndex, tokenEndIndex, totalTokenOffset); annotateChunkText(chunk, tokenTextKey); annotateChunkTokens(chunk, tokenChunkKey, tokenLabelKey); return chunk; }
if (origText != null) { ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class);
if (origText != null) { ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class);
if (origText != null) { ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class);
public CoreMap extractAnnotation(CoreMap sourceAnnotation) { if (chunkOffsets != null) { annotation = ChunkAnnotationUtils.getMergedChunk(sourceAnnotation.get(CoreAnnotations.NumerizedTokensAnnotation.class), chunkOffsets.getBegin(), chunkOffsets.getEnd(), CoreMapAttributeAggregator.DEFAULT_NUMERIC_TOKENS_AGGREGATORS ); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation); } charOffsets = Interval.toInterval(annotation.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), annotation.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); tokenOffsets = Interval.toInterval(annotation.get(CoreAnnotations.TokenBeginAnnotation.class), annotation.get(CoreAnnotations.TokenEndAnnotation.class), Interval.INTERVAL_OPEN_END); } else { Integer baseCharOffset = sourceAnnotation.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); if (baseCharOffset == null) { baseCharOffset = 0; }; chunkOffsets = ChunkAnnotationUtils.getChunkOffsetsUsingCharOffsets(sourceAnnotation.get(CoreAnnotations.NumerizedTokensAnnotation.class), charOffsets.getBegin() + baseCharOffset, charOffsets.getEnd() + baseCharOffset); CoreMap annotation2 = ChunkAnnotationUtils.getMergedChunk(sourceAnnotation.get(CoreAnnotations.NumerizedTokensAnnotation.class), chunkOffsets.getBegin(), chunkOffsets.getEnd(), CoreMapAttributeAggregator.DEFAULT_NUMERIC_TOKENS_AGGREGATORS ); annotation = ChunkAnnotationUtils.getAnnotatedChunkUsingCharOffsets(sourceAnnotation, charOffsets.getBegin(), charOffsets.getEnd()); tokenOffsets = Interval.toInterval(annotation.get(CoreAnnotations.TokenBeginAnnotation.class), annotation.get(CoreAnnotations.TokenEndAnnotation.class), Interval.INTERVAL_OPEN_END); annotation.set(CoreAnnotations.NumerizedTokensAnnotation.class, annotation2.get(CoreAnnotations.NumerizedTokensAnnotation.class)); } annotation.set(TimeExpression.ChildrenAnnotation.class, annotation.get(CoreAnnotations.NumerizedTokensAnnotation.class)); annotation.set(Annotation.class, this); text = annotation.get(CoreAnnotations.TextAnnotation.class); temporal = temporalFunc.apply(annotation); return annotation; }
chunkOffsets.getBegin(), chunkOffsets.getEnd()); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation);
chunkOffsets.getBegin(), chunkOffsets.getEnd()); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation);
chunkOffsets.getBegin(), chunkOffsets.getEnd(), aggregators ); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation);