private static FrequencyDistribution<String> documentBasedDistribution(JCas jcas, Annotation focus, boolean useCanonical, int minN, int maxN) { FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>(); List<String> postagstrings = new ArrayList<String>(); for (POS p : selectCovered(jcas, POS.class, focus)) { if (useCanonical) { postagstrings.add(p.getClass().getSimpleName()); } else { postagstrings.add(p.getPosValue()); } } String[] posarray = postagstrings.toArray(new String[postagstrings.size()]); for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) { posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE)); } return posNgrams; }
public static FrequencyDistribution<String> getDocumentPosNgrams(JCas jcas, int minN, int maxN, boolean useCanonical) { FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>(); for (Sentence s : select(jcas, Sentence.class)) { List<String> postagstrings = new ArrayList<String>(); for (POS p : JCasUtil.selectCovered(jcas, POS.class, s)) { if (useCanonical) { postagstrings.add(p.getClass().getSimpleName()); } else { postagstrings.add(p.getPosValue()); } } String[] posarray = postagstrings.toArray(new String[postagstrings.size()]); for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) { posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE)); } } return posNgrams; }
public static FrequencyDistribution<String> getDocumentPosNgrams(JCas jcas, int minN, int maxN, boolean useCanonical) { FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>(); for (Sentence s : select(jcas, Sentence.class)) { List<String> postagstrings = new ArrayList<String>(); for (POS p : JCasUtil.selectCovered(jcas, POS.class, s)) { if (useCanonical) { postagstrings.add(p.getClass().getSimpleName()); } else { postagstrings.add(p.getPosValue()); } } String[] posarray = postagstrings.toArray(new String[postagstrings.size()]); for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) { posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE)); } } return posNgrams; }
private static FrequencyDistribution<String> sentenceBasedDistribution(JCas jcas, Annotation focus, boolean useCanonical, int minN, int maxN) { FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>(); for (Sentence s : selectCovered(jcas, Sentence.class, focus)) { List<String> postagstrings = new ArrayList<String>(); for (POS p : selectCovered(jcas, POS.class, s)) { if (useCanonical) { postagstrings.add(p.getClass().getSimpleName()); } else { postagstrings.add(p.getPosValue()); } } String[] posarray = postagstrings.toArray(new String[postagstrings.size()]); for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) { posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE)); } } return posNgrams; }
for (List<String> ngram : new NGramStringListIterable(keywordList.toArray(new String[keywordList.size()]), minN, maxN)) { String ngramString = StringUtils.join(ngram, GLUE); documentNgrams.inc(ngramString);
for (List<String> ngram : new NGramStringListIterable( keywordList.toArray(new String[keywordList.size()]), minN, maxN)) { String ngramString = StringUtils.join(ngram, GLUE);
public static FrequencyDistribution<String> getDocumentNgrams( JCas jcas, boolean lowerCaseNGrams, boolean filterPartialMatches, int minN, int maxN, Set<String> stopwords) { FrequencyDistribution<String> documentNgrams = new FrequencyDistribution<String>(); for (Sentence s : select(jcas, Sentence.class)) { // TODO parameterize type for (List<String> ngram : new NGramStringListIterable(toText(selectCovered(Token.class, s)), minN, maxN)) { if (passesNgramFilter(ngram, stopwords, filterPartialMatches)) { String ngramString = StringUtils.join(ngram, NGRAM_GLUE); if (lowerCaseNGrams) { ngramString = ngramString.toLowerCase(); } documentNgrams.inc(ngramString); } } } return documentNgrams; }
for (Sentence s : select(jcas, Sentence.class)) { List<String> strings = valuesToText(jcas, s, annotationClass.getName()); for (List<String> ngram : new NGramStringListIterable(strings, minN, maxN)) { if (lowerCaseNGrams) { ngram = lower(ngram);
for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) { phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));
for (List<String> ngram : new NGramStringListIterable(toText(selectCovered( Token.class, s)), minN, maxN)) { for (List<String> ngram : new NGramStringListIterable(toText(selectCovered(Token.class, focusAnnotation)), minN, maxN)) {
for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) { phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));
for (Sentence s : selectCovered(jcas, Sentence.class, aTarget)) { List<String> strings = valuesToText(jcas, s, annotationClass.getName()); for (List<String> ngram : new NGramStringListIterable(strings, minN, maxN)) { if (lowerCaseNGrams) { ngram = lower(ngram);
for (List<String> ngram : new NGramStringListIterable(toText(selectCovered( Token.class, s)), minN, maxN)) { for (List<String> ngram : new NGramStringListIterable(toText(selectCovered(Token.class, focusAnnotation)), minN, maxN)) {
for (List<String> ngram : new NGramStringListIterable( toText(selectCovered(Token.class, s)), minN, maxN)) { for (List<String> ngram : new NGramStringListIterable( toText(selectCovered(Token.class, focusAnnotation)), minN, maxN)) {