@Override protected void postProcessAnnotations(Span[] tokens, AnnotationFS[] tokenAnnotations) { // if interest if (probabilityFeature != null) { double[] tokenProbabilties = tokenizer.getTokenProbabilities(); for (int i = 0; i < tokenAnnotations.length; i++) { tokenAnnotations[i].setDoubleValue(probabilityFeature, tokenProbabilties[i]); } } }
/** * Create a new combo iterator. * * @param cas * The CAS we're operating on. * @param upper * The type of the upper iterator, e.g., sentence. * @param lower * The type of the lower iterator, e.g., token. */ public AnnotationComboIterator(CAS cas, Type upper, Type lower) { this.upperIt = cas.getAnnotationIndex(upper).iterator(); this.lowerIt = cas.getAnnotationIndex(lower).iterator(); this.upperIt.moveToFirst(); this.lowerIt.moveToFirst(); if (this.upperIt.isValid()) { final AnnotationFS upperFS = this.upperIt.get(); this.upperBegin = upperFS.getBegin(); this.upperEnd = upperFS.getEnd(); } else { this.nextLowerChecked = true; } }
sentenceTokenList.add(tokenAnnotation.getCoveredText()); names[i].getStart()).getBegin(); names[i].getEnd() - 1).getEnd();
private void addChunkAnnotation(CAS tcas, AnnotationFS[] tokenAnnotations, String tag, int start, int end) { AnnotationFS chunk = tcas.createAnnotation(mChunkType, tokenAnnotations[start].getBegin(), tokenAnnotations[end - 1].getEnd()); chunk.setStringValue(mChunkFeature, tag); tcas.getIndexRepository().addFS(chunk); }
String text = containerAnnotation.getCoveredText(); sentPositions[i].getStart() + containerAnnotation.getBegin(), sentPositions[i].getEnd() + containerAnnotation.getBegin()); logger.log(Level.FINER, "\"" + sentences[i].getCoveredText() + "\"");
public void process(CAS cas) { FSIterator<AnnotationFS> tokenAnnotations = cas.getAnnotationIndex(mTokenType).iterator(); List<String> tokensList = new ArrayList<>(); while (tokenAnnotations.hasNext()) { tokensList.add(tokenAnnotations.next().getCoveredText()); } double[] result = mCategorizer.categorize(tokensList.toArray(new String[tokensList.size()])); String bestCategory = mCategorizer.getBestCategory(result); setBestCategory(cas, bestCategory); } }
sentenceTokenList.add(tokenAnnotation.getCoveredText()); final AnnotationFS tokenAnnotation = sentenceTokenIterator.next(); tokenAnnotation.setStringValue(this.posFeature, posTag); tokenAnnotation.setDoubleValue(this.probabilityFeature, posProbabilities[index]); sentenceWithPos.append(token.getCoveredText()); sentenceWithPos.append('\\'); sentenceWithPos.append(token.getStringValue(this.posFeature)); sentenceWithPos.append(' ');
@Override protected Span[] tokenize(CAS cas, AnnotationFS sentence) { return opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE. tokenizePos(sentence.getCoveredText()); } }
tokens[index] = tokenAnnotation.getCoveredText(); pos[index++] = tokenAnnotation.getFeatureValueAsString( mPosFeature);
protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) { Parse[] parseChildren = parse.getChildren(); AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length]; // do this for all children for (int i = 0; i < parseChildren.length; i++) { parseChildAnnotations[i] = createAnnotation(cas, offset, parseChildren[i]); } AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset + parse.getSpan().getStart(), offset + parse.getSpan().getEnd()); parseAnnotation.setStringValue(mTypeFeature, parse.getType()); if (probabilityFeature != null) { parseAnnotation.setDoubleValue(probabilityFeature, parse.getProb()); } ArrayFS childrenArray = cas.createArrayFS(parseChildAnnotations.length); childrenArray.copyFromArray(parseChildAnnotations, 0, 0, parseChildAnnotations.length); parseAnnotation.setFeatureValue(childrenFeature, childrenArray); cas.getIndexRepository().addFS(parseAnnotation); return parseAnnotation; }
@Override protected void setBestCategory(CAS tcas, String bestCategory) { FSIndex<AnnotationFS> categoryIndex = tcas.getAnnotationIndex(mCategoryType); AnnotationFS categoryAnnotation; if (categoryIndex.size() > 0) { categoryAnnotation = categoryIndex.iterator().next(); } else { categoryAnnotation = tcas.createAnnotation(mCategoryType, 0, tcas.getDocumentText().length()); tcas.getIndexRepository().addFS(categoryAnnotation); } categoryAnnotation.setStringValue(mCategoryFeature, bestCategory); } }
public boolean hasNext() { if (AnnotationComboIterator.this.nextLowerChecked) { return AnnotationComboIterator.this.nextLowerAvailable; } AnnotationComboIterator.this.nextLowerChecked = true; AnnotationComboIterator.this.nextLowerAvailable = false; if (AnnotationComboIterator.this.lowerIt.isValid()) { AnnotationFS lowerFS = AnnotationComboIterator.this.lowerIt.get(); int lowerBegin = lowerFS.getBegin(); while (lowerBegin < AnnotationComboIterator.this.upperBegin) { AnnotationComboIterator.this.lowerIt.moveToNext(); if (AnnotationComboIterator.this.lowerIt.isValid()) { lowerFS = AnnotationComboIterator.this.lowerIt.get(); lowerBegin = lowerFS.getBegin(); } else { return false; } } if (AnnotationComboIterator.this.upperEnd >= lowerFS.getEnd()) { AnnotationComboIterator.this.nextLowerAvailable = true; } } return AnnotationComboIterator.this.nextLowerAvailable; }
int sentenceOffset = sentence.getBegin(); tokeninzedSentenceLog.append(tokenAnnotations[i].getCoveredText()); tokeninzedSentenceLog.append(' ');
@Override protected Span[] tokenize(CAS cas, AnnotationFS sentence) { return tokenizer.tokenizePos(sentence.getCoveredText()); } }
@Override protected void postProcessAnnotations(AnnotationFS[] sentences) { if (probabilityFeature != null) { double[] sentenceProbabilities = sentenceDetector.getSentenceProbabilities(); for (int i = 0; i < sentences.length; i++) { sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]); } } }
public AnnotationIteratorPair next() { if (!this.upperIt.hasNext()) { throw new NoSuchElementException(); } final AnnotationFS upperFS = this.upperIt.next(); this.upperBegin = upperFS.getBegin(); this.upperEnd = upperFS.getEnd(); this.nextLowerChecked = false; return new AnnotationIteratorPair(upperFS, new AnnotationIterator()); }
@Override protected Span[] tokenize(CAS cas, AnnotationFS sentence) { return tokenizer.tokenizePos(sentence.getCoveredText()); }
protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[] nameAnnotations) { if (probabilityFeature != null) { double[] probs = mNameFinder.probs(detectedNames); for (int i = 0; i < nameAnnotations.length; i++) { nameAnnotations[i].setDoubleValue(probabilityFeature, probs[i]); } } }