@Override protected void doExecute(ExecutionContext ctx) { StringFieldValue input = (StringFieldValue)ctx.getValue(); SpanTree tree = input.getSpanTree(SpanTrees.LINGUISTICS); Map<Integer, List<String>> map = new HashMap<>(); for (Annotation anno : tree) {
/** * Annotates the given string with the appropriate linguistics annotations. * * @param text the text to annotate * @return whether or not anything was annotated */ public boolean annotate(StringFieldValue text) { if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS. Tokenizer tokenizer = factory.getTokenizer(); String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength()); Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), config.getRemoveAccents()); TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences()); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS); for (Token token : tokens) { addAnnotationSpan(text.getString(), tree.spanList(), tokenizer, token, config.getStemMode(), termOccurrences); } if (tree.numAnnotations() == 0) return false; text.setSpanTree(tree); return true; }