@Override public String getTokenBaseForm(Token token) { return token.getCoveredText(); }
/** * @return the token text taking into account a {@link TokenForm} annotation that might exist. */ public String getText() { String form = getFormValue(); return form != null ? form : getCoveredText(); }
/** * @return the token text taking into account a {@link TokenForm} annotation that might exist. */ public String getText() { String form = getFormValue(); return form != null ? form : getCoveredText(); }
@Override protected FrequencyDistribution<String> getNgramsFD(JCas jcas) throws TextClassificationException { FrequencyDistribution<String> fd = new FrequencyDistribution<>(); Collection<Token> select = JCasUtil.select(jcas, Token.class); for(Token t : select) { fd.addSample(t.getCoveredText().length()+ "_" + r.nextInt(), t.getCoveredText().length()); } return fd; }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { select(aInput, Token.class).stream() .filter(token -> ignoreCase ? tokensToReplace.contains(token.getCoveredText().toLowerCase()) : tokensToReplace.contains(token.getCoveredText())) .forEach(token -> replace(token.getBegin(), token.getEnd(), replacement)); }
public double getSimilarity(JCas jcas1, JCas jcas2, Annotation coveringAnnotation1, Annotation coveringAnnotation2) throws SimilarityException { List<Token> t1 = JCasUtil.selectCovered(jcas1, Token.class, coveringAnnotation1); List<Token> t2 = JCasUtil.selectCovered(jcas2, Token.class, coveringAnnotation2); int noOfCharacters = 0; for (Token token : t1) noOfCharacters += token.getCoveredText().length(); for (Token token : t2) noOfCharacters += token.getCoveredText().length(); return new Double(noOfCharacters) / new Double(t1.size() + t2.size()); } }
/** * for tracing. */ private static String outChildrenMap(Map<Token, Set<BasicNode>> childrenByParent) { String result = "childrenByParent(" + childrenByParent.size() + ")\n"; for (Entry<Token, Set<BasicNode>> entry: childrenByParent.entrySet()) { result += String.format("\t%s(%d):[", entry.getKey().getCoveredText(), entry.getValue().size()); for (BasicNode node: entry.getValue()) { result += outNode(node) + ", "; } result += "]\n"; } result += "\n"; return result; }
@Override boolean keepArgument(JCas jCas) { Collection<Token> tokens = JCasUtil.select(jCas, Token.class); int oovWords = 0; for (Token token : tokens) { if (!vocabulary.contains(token.getCoveredText())) { oovWords++; } } frequency.addValue(oovWords); // System.out.println(frequency); return oovWords <= THRESHOLD; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (Dependency dep : select(aJCas, Dependency.class)) { System.out.format("%-10s [%s] [%s]%n", dep.getDependencyType(), dep.getGovernor().getCoveredText(), dep.getDependent().getCoveredText()); } } }
private String tokensToString(List<Token> aSentence) { StringJoiner joiner = new StringJoiner(" "); for (Token t : aSentence) { joiner.add(t.getCoveredText()); } // Avoid IndexOutOfBoundsException in case aSentence is empty (i.e. during testing) return joiner.toString().substring(0, (joiner.length() != 0) ? joiner.length() - 1 : 0); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (Dependency dep : select(aJCas, Dependency.class)) { System.out.format("%-10s [%s] [%s]%n", dep.getDependencyType(), dep.getGovernor().getCoveredText(), dep.getDependent().getCoveredText()); } } }
private String tokensToString(List<Token> aSentence) { StringJoiner joiner = new StringJoiner(" "); for (Token t : aSentence) { joiner.add(t.getCoveredText()); } // Avoid IndexOutOfBoundsException in case aSentence is empty (i.e. during testing) return joiner.toString().substring(0, (joiner.length() != 0) ? joiner.length() - 1 : 0); }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { select(aInput, Token.class).stream() .filter(token -> regexPattern.matcher(token.getCoveredText()).matches()) .forEach(token -> replace(token.getBegin(), token.getEnd(), replacement)); }
/** * Produce token lemma, return the original string if the lemma is null; * converts the string to lower case. * * @param tok * @return */ public String getTokenLemma(Token tok) { Lemma l = tok.getLemma(); // For some weird reason, Clear NLP lemma is sometimes NULL return (l!=null) ? l.getValue() : tok.getCoveredText().toLowerCase(); }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { // Processing must be done back-to-front to ensure that offsets for the next token being // processed remain valid. If this is done front-to-back, replacing a token with a // shorter or longer sequence would cause the offsets to shift. Collection<Token> tokens = select(aInput, Token.class); Token[] tokensArray = tokens.toArray(new Token[tokens.size()]); for (int i = tokensArray.length - 1; i >= 0; i--) { Token token = tokensArray[i]; String curToken = token.getCoveredText(); replace(token.getBegin(), token.getEnd(), mappings.containsKey(curToken) ? mappings.get(curToken) : curToken); } }
private static String outToken(Token t) { return String.format("%s[%s:%s]", t.getCoveredText(), t.getBegin(), t.getEnd()); }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for (Dependency dep : JCasUtil.select(jcas, Dependency.class)) { String type = dep.getDependencyType(); String governor = dep.getGovernor().getCoveredText(); String dependent = dep.getDependent().getCoveredText(); String dependencyString = getDependencyString(governor, dependent, type, lowerCaseDeps); fd.inc(dependencyString); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for (Dependency dep : JCasUtil.select(jcas, Dependency.class)) { String type = dep.getDependencyType(); String governor = dep.getGovernor().getCoveredText(); String dependent = dep.getDependent().getCoveredText(); String dependencyString = getDependencyString(governor, dependent, type, lowerCaseDeps); fd.inc(dependencyString); } }
protected CoreLabel tokenToWord(Token aToken) { CoreLabel l = CoreNlpUtils.tokenToWord(aToken); l.setValue(aToken.getCoveredText()); if (!readPos) { l.setTag(null); } return l; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for (Token token : JCasUtil.select(jcas, Token.class)) { PhoneticTranscription transcription = new PhoneticTranscription(jcas, token.getBegin(), token.getEnd()); transcription.setTranscription(encode(token.getCoveredText())); transcription.setName(encoder.getClass().getName()); transcription.addToIndexes(); } }