/** Return tokenized version of a string. Tokens are sequences * of alphanumerics, or any single punctuation character. */ public Token[] tokenize(String input) { Token[] initialTokens = innerTokenizer.tokenize(input); List tokens = new ArrayList(); for (int i=0; i<initialTokens.length; i++) { Token tok = initialTokens[i]; String str = "^"+tok.getValue()+"$"; if (keepOldTokens) tokens.add( intern(str) ); for (int lo=0; lo<str.length(); lo++) { for (int len=minNGramSize; len<=maxNGramSize; len++) { if (lo+len<str.length()) { tokens.add( innerTokenizer.intern( str.substring(lo,lo+len) )); } } } } return (Token[]) tokens.toArray(new Token[tokens.size()]); }