/** * Clones this StringFieldValue and its span trees. * * @return a new deep-copied StringFieldValue */ @Override public StringFieldValue clone() { StringFieldValue strfval = (StringFieldValue) super.clone(); if (spanTrees != null) { strfval.spanTrees = new HashMap<String, SpanTree>(spanTrees.size()); for (Map.Entry<String, SpanTree> entry : spanTrees.entrySet()) { strfval.spanTrees.put(entry.getKey(), new SpanTree(entry.getValue())); } } return strfval; }
/** * Convenience method for adding an annotation to this span, same as * <code>getSpanTree().{@link SpanTree#annotate(SpanNode,Annotation) spanTree.annotate(this,annotation)}</code> * * @param annotation the annotation to add * @return this for chaining * @throws NullPointerException if this span is not attached to a tree */ public SpanNode annotate(Annotation annotation) { getNonNullSpanTree().annotate(this, annotation); return this; }
/** * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when * adding an Annotation that shall annotate a SpanNode. Upon return, Annotation.getSpanNode() * returns the given node. This one is unchecked and assumes that the SpanNode is valid and has * already been attached to the Annotation. * * @param node the node to annotate * @param annotation the Annotation to add * @return this, for chaining * @see com.yahoo.document.annotation.Annotation */ public final SpanTree annotateFast(SpanNode node, Annotation annotation) { annotateInternal(node, annotation); return this; }
@SuppressWarnings("unchecked") public SpanTree(SpanTree otherToCopy) { name = otherToCopy.name; setRoot(copySpan(otherToCopy.root)); List<Annotation> annotationsToCopy = new ArrayList<Annotation>(otherToCopy.getAnnotations()); List<Annotation> newAnnotations = new ArrayList<Annotation>(annotationsToCopy.size()); IdentityHashMap<SpanNode, Integer> originalSpanNodes = getSpanNodes(otherToCopy); List<SpanNode> copySpanNodes = getSpanNodes(); IdentityHashMap<Annotation, Integer> originalAnnotations = getAnnotations(annotationsToCopy); continue; setCorrectAnnotationReference(a.getFieldValue(), originalAnnotations, newAnnotations); annotate(a); for (IndexKey key : otherToCopy.getCurrentIndexes()) { createIndex(key);
tree.cleanup(); new StringFieldValue(tree.getName()).serialize(this); write(tree.getRoot()); List<Annotation> tmpAnnotationList = new ArrayList<Annotation>(tree.numAnnotations()); for (Annotation annotation : tree) { tmpAnnotationList.add(annotation);
/** * Annotates the given string with the appropriate linguistics annotations. * * @param text the text to annotate * @return whether or not anything was annotated */ public boolean annotate(StringFieldValue text) { if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS. Tokenizer tokenizer = factory.getTokenizer(); String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength()); Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), config.getRemoveAccents()); TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences()); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS); for (Token token : tokens) { addAnnotationSpan(text.getString(), tree.spanList(), tokenizer, token, config.getStemMode(), termOccurrences); } if (tree.numAnnotations() == 0) return false; text.setSpanTree(tree); return true; }
@Override protected void doExecute(ExecutionContext ctx) { StringFieldValue input = (StringFieldValue)ctx.getValue(); if (input.getString().isEmpty()) { return; } StringFieldValue output = input.clone(); ctx.setValue(output); String prev = output.getString(); String next = toLowerCase(prev); SpanList root = new SpanList(); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS, root); SpanNode node = new Span(0, prev.length()); tree.annotate(node, new Annotation(AnnotationTypes.TERM, next.equals(prev) ? null : new StringFieldValue(next))); tree.annotate(node, new Annotation(AnnotationTypes.TOKEN_TYPE, new IntegerFieldValue(TokenType.ALPHABETIC.getValue()))); root.add(node); output.setSpanTree(tree); }
@Override protected void doExecute(ExecutionContext ctx) { StringFieldValue input = (StringFieldValue)ctx.getValue(); SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just // need to annotate it somehow (as a non-term) to make sure it is added to the summary) if (lastPosition < gram.getStart()) { typedSpan(lastPosition, gram.getStart() - lastPosition, TokenType.PUNCTUATION, spanList); } // annotate gram as a word term String gramString = gram.extractFrom(input.getString()); typedSpan(gram.getStart(), gram.getLength(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getLength(); } // handle punctuation at the end if (lastPosition < input.toString().length()) { typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } }
/** * Removes an Annotation from the internal list of annotations. * * @param a the annotation to remove * @return true if the Annotation was successfully removed, false otherwise */ public boolean remove(Annotation a) { return getAnnotations().remove(a); }
public void clearIndex(IndexKey key) { if (key == IndexKey.SPAN_NODE && annotations instanceof SpanNode2AnnotationContainer) { clearIndex(); } else if (key == IndexKey.ANNOTATION_TYPE && annotations instanceof AnnotationType2AnnotationContainer) { clearIndex(); } }
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof SpanTree)) return false; SpanTree tree = (SpanTree) o; if (!annotationsEquals(tree)) return false; if (!name.equals(tree.name)) return false; if (!root.equals(tree.root)) return false; return true; }
tree.cleanup(); new StringFieldValue(tree.getName()).serialize(this); write(tree.getRoot()); List<Annotation> tmpAnnotationList = new ArrayList<Annotation>(tree.numAnnotations()); for (Annotation annotation : tree) { tmpAnnotationList.add(annotation);
@SuppressWarnings("unchecked") private boolean annotationsEquals(SpanTree tree) { List<Annotation> annotationCollection = new LinkedList<Annotation>(getAnnotations()); List<Annotation> otherAnnotations = new LinkedList<Annotation>(tree.getAnnotations()); return annotationCollection.size() == otherAnnotations.size() && CollectionUtils.isEqualCollection(annotationCollection, otherAnnotations); }
public void clearIndexes() { if (!(annotations instanceof ListAnnotationContainer)) { clearIndex(); } }
/** * Convenience method for adding an annotation with no value to this span, same as * <code>getSpanTree().{@link SpanTree#annotate(SpanNode,AnnotationType) spanTree.annotate(this,type)}</code> * * @param type the type of the annotation to add * @return this for chaining * @throws NullPointerException if this span is not attached to a tree */ public SpanNode annotate(AnnotationType type) { getNonNullSpanTree().annotate(this,type); return this; }
/** * Adds an Annotation to the internal list of annotations for this SpanTree. Use this when * adding an Annotation that uses an AnnotationReference, and does not annotate a SpanNode. * * @param a the Annotation to add * @return this, for chaining * @see com.yahoo.document.annotation.Annotation * @see com.yahoo.document.annotation.AnnotationReference * @see com.yahoo.document.annotation.AnnotationReferenceDataType */ public SpanTree annotate(Annotation a) { if (a.getSpanNode() == null) { annotateInternal(DummySpanNode.INSTANCE, a); } else { annotateInternal(a.getSpanNode(), a); } return this; }