public List<CoreLabel> process(List<CoreLabel> tokens) { return process(null, tokens); }
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); } List<CoreLabel> newTokens = process(annotation, tokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them // redo the token indexes if xml tokens have been removed setTokenBeginTokenEnd(newTokens); annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens); if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); } } }
public List<CoreLabel> process(List<CoreLabel> tokens) { return process(tokens, null); }
public List<CoreLabel> process(List<CoreLabel> tokens) { return process(null, tokens); }
public List<CoreLabel> process(List<CoreLabel> tokens) { return process(null, tokens); }
@Override public void annotate(Annotation annotation) { if (annotation.has(CoreAnnotations.TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); List<CoreLabel> newTokens = process(annotation, tokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens); } }
public void annotate(Annotation annotation) { if (annotation.has(TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); List<CoreLabel> dateTokens = new ArrayList<CoreLabel>(); List<CoreLabel> newTokens = process(tokens, dateTokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them annotation.set(TokensAnnotation.class, newTokens); // if the doc date was found, save it. it is used by SUTime (inside the "ner" annotator) if(dateTokens.size() > 0){ StringBuffer os = new StringBuffer(); boolean first = true; for (CoreLabel t : dateTokens) { if (!first) os.append(" "); os.append(t.word()); first = false; } //System.err.println("DOC DATE IS: " + os.toString()); annotation.set(DocDateAnnotation.class, os.toString()); } } }
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); } List<CoreLabel> newTokens = process(annotation, tokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them // redo the token indexes if xml tokens have been removed setTokenBeginTokenEnd(newTokens); annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens); if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); } } }