@Override public ASCIIFoldingExpansionFilter create(final TokenStream input) { return new ASCIIFoldingExpansionFilter(input); }
/** * Converts characters above ASCII to their ASCII equivalents. For example, * accents are removed from accented characters. * @param input The string to fold * @param length The number of characters in the input string */ public void foldToASCII(final char[] input, final int length) { // Worst-case length required: final int maxSizeNeeded = 4 * length; this.growBuffer(output, maxSizeNeeded); outputPos = foldToASCII(input, 0, output, 0, length); }
@Test public void testTokenTypeFilter1() throws Exception { final Reader reader = new StringReader("aaa clés café"); final TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); final ASCIIFoldingExpansionFilter filter = new ASCIIFoldingExpansionFilter(stream); final CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); final PositionIncrementAttribute posAtt = filter.getAttribute(PositionIncrementAttribute.class); filter.reset(); // prepare stream this.assertTermEquals("aaa", 1, filter, termAtt, posAtt); this.assertTermEquals("cles", 1, filter, termAtt, posAtt); this.assertTermEquals("clés", 0, filter, termAtt, posAtt); this.assertTermEquals("cafe", 1, filter, termAtt, posAtt); this.assertTermEquals("café", 0, filter, termAtt, posAtt); }
this.saveBuffer(); final char c = buffer[i]; if (c >= '\u0080') { this.foldToASCII(buffer, length); termAtt.copyBuffer(output, 0, outputPos); savedState = true;
private void saveBuffer() { savedLength = termAtt.length(); this.growBuffer(savedBuffer, savedLength); System.arraycopy(termAtt.buffer(), 0, savedBuffer, 0, savedLength); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader); final TokenStream ts = new ASCIIFoldingExpansionFilter(t); return new TokenStreamComponents(t, ts); } };
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader); final TokenStream ts = new ASCIIFoldingExpansionFilter(t); return new TokenStreamComponents(t, ts); } };