/** Add whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords addStopWords (File wordlist) { if (wordlist != null) this.addStopWords (fileToStringArray(wordlist, null)); return this; }
/** Remove whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords removeStopWords (File wordlist) { this.removeStopWords (fileToStringArray(wordlist, null)); return this; }
/** Remove whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords removeStopWords (File wordlist) { this.removeStopWords (fileToStringArray(wordlist, null)); return this; }
/** Add whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords addStopWords (File wordlist) { if (wordlist != null) this.addStopWords (fileToStringArray(wordlist, null)); return this; }
/** Remove whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords removeStopWords (File wordlist) { this.removeStopWords (fileToStringArray(wordlist, null)); return this; }
/** Add whitespace-separated tokens in file "wordlist" to the stoplist. */ public TokenSequenceRemoveStopwords addStopWords (File wordlist) { if (wordlist != null) this.addStopWords (fileToStringArray(wordlist, null)); return this; }
/** * Load a stoplist from a file. * @param stoplistFile The file to load * @param encoding The encoding of the stoplist file (eg UTF-8) * @param includeDefault Whether to include the standard mallet English stoplist */ public TokenSequenceRemoveStopwords(File stoplistFile, String encoding, boolean includeDefault, boolean caseSensitive, boolean markDeletions) { if (! includeDefault) { stoplist = new HashSet<String>(); } else { stoplist = newDefaultStopList(); } addStopWords (fileToStringArray(stoplistFile, encoding)); this.caseSensitive = caseSensitive; this.markDeletions = markDeletions; }
/** * Load a stoplist from a file. * @param stoplistFile The file to load * @param encoding The encoding of the stoplist file (eg UTF-8) * @param includeDefault Whether to include the standard mallet English stoplist */ public TokenSequenceRemoveStopwords(File stoplistFile, String encoding, boolean includeDefault, boolean caseSensitive, boolean markDeletions) { if (! includeDefault) { stoplist = new HashSet<String>(); } else { stoplist = newDefaultStopList(); } addStopWords (fileToStringArray(stoplistFile, encoding)); this.caseSensitive = caseSensitive; this.markDeletions = markDeletions; }
/** * Load a stoplist from a file. * @param stoplistFile The file to load * @param encoding The encoding of the stoplist file (eg UTF-8) * @param includeDefault Whether to include the standard mallet English stoplist */ public TokenSequenceRemoveStopwords(File stoplistFile, String encoding, boolean includeDefault, boolean caseSensitive, boolean markDeletions) { if (! includeDefault) { stoplist = new HashSet<String>(); } else { stoplist = newDefaultStopList(); } addStopWords (fileToStringArray(stoplistFile, encoding)); this.caseSensitive = caseSensitive; this.markDeletions = markDeletions; }