@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { List<Token> toRemove = new LinkedList<>(); for (Token token : select(aJCas, Token.class)) { if (mustMatch && !filterRegex.matcher(token.getCoveredText()).matches() || !mustMatch && filterRegex.matcher(token.getCoveredText()).matches()) { toRemove.add(token); } } for (Token token : toRemove) { token.removeFromIndexes(); } } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { List<Token> toRemove = new ArrayList<>(); List<Token> tokens = new ArrayList<>(select(aJCas, Token.class)); for (Token token : tokens) { Matcher suffixMatcher = suffixPattern.matcher(token.getCoveredText()); if (suffixMatcher.matches()) { token.removeFromIndexes(); token.setEnd(token.getEnd() - (suffixMatcher.end(1) - suffixMatcher.start(1))); token.addToIndexes(); /* remove tokens that have become too short */ if (minTokenLength > 0 && token.getEnd() - token.getBegin() < minTokenLength) { toRemove.add(token); } } } for (Token token : toRemove) { token.removeFromIndexes(aJCas); } } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if (embedding == null) { return; } Collection<Token> select = JCasUtil.select(aJCas, Token.class); for (Token t : select) { if (vocab.contains(t.getCoveredText())) { continue; } POS pos = t.getPos(); if (pos != null) { pos.removeFromIndexes(); t.setPos(null); } t.removeFromIndexes(); droppedVocabulary++; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { Collection<Token> toRemove = new ArrayList<Token>(); for (Token t : select(aJCas, Token.class)) { String text = t.getCoveredText(); for (String prefix : prefixes) { if (text.startsWith(prefix)) { t.setBegin(t.getBegin() + prefix.length()); break; } } text = t.getCoveredText(); for (String suffix : suffixes) { if (text.endsWith(suffix)) { t.setEnd(t.getEnd() - suffix.length()); break; } } if (t.getCoveredText().length() == 0) { toRemove.add(t); } } for (Token t : toRemove) { t.removeFromIndexes(); } } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if (embedding == null) { return; } Collection<Token> select = JCasUtil.select(aJCas, Token.class); for (Token t : select) { if (vocab.contains(t.getCoveredText())) { continue; } POS pos = t.getPos(); if (pos != null) { pos.removeFromIndexes(); t.setPos(null); } t.removeFromIndexes(); droppedVocabulary++; } }
token.removeFromIndexes(); token.setEnd(covered.get(covered.size() - 1).getEnd()); token.addToIndexes();
t.removeFromIndexes(); aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed token with illegal span: %s", t));