/** * Stems <code>word</code> (which must be a <code>Word</code>, * or else * a ClassCastException will be thrown, and returns stemmed * <code>Word</code>. */ public Word apply(Word word) { return stem(word); }
/** * Stems <code>s</code> and returns stemmed <code>String</code>. */ public String stem(String s) { char[] characters = s.toCharArray(); for (char character : characters) { add(character); } stem(); return toString(); }
/** * Test program for demonstrating the Stemmer. It reads text from a * a list of files, stems each word, and writes the result to standard * output. Note that the word stemmed is expected to be in lower case: * forcing lower case must be done outside the Stemmer class. * Usage: Stemmer file-name file-name ... */ public static void main(String[] args) throws IOException { Stemmer s = new Stemmer(); if (args[0].equals("-file")) { Iterator<Word> it = PTBTokenizer.newPTBTokenizer(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); while (it.hasNext()) { Word token = it.next(); System.out.print(s.stem(token.word())); System.out.print(' '); } } else { for (String arg : args) { System.out.print(s.stem(arg)); System.out.print(' '); } } System.out.println(); }
/** * Stems <code>w</code> and returns stemmed <code>Word</code>. */ public Word stem(Word w) { return (new Word(stem(w.word()))); }