/** * Create a new StopListFilter with a small default stoplist */ public StoplistFilter() { this(new StopList()); }
public StopList() { wordSet = Generics.newHashSet(); addGenericWords(); }
/** * Returns a new Document with the same meta-data as <tt>in</tt> and the same words * except those on the stop list this filter was constructed with. */ public Document<L, F, Word> processDocument(Document<L, F, Word> in) { Document<L, F, Word> out = in.blankDocument(); for (Word w: in) { if (!stoplist.contains(w)) { out.add(w); } } return (out); } }
/** * Create a new StopListFilter with the stoplist given in <code>stoplistfile</code> */ public StoplistFilter(String stoplistfile) { this(new StopList(new File(stoplistfile))); }