@Override public TokenStream create(final TokenStream input) { return new MailtoFilter(input); }
/** * @param input */ public MailtoFilter(final TokenStream input) { super(input); termAtt = this.addAttribute(CharTermAttribute.class); posIncrAtt = this.addAttribute(PositionIncrementAttribute.class); termBuffer = CharBuffer.allocate(256); }
@Override public final boolean incrementToken() throws IOException { if (isMailto) { termAtt.setEmpty(); // return the scheme + the mail part isMailto = false; posIncrAtt.setPositionIncrement(0); termAtt.copyBuffer(termBuffer.array(), 0, termBuffer.position()); return true; } if (input.incrementToken()) { if (this.isMailtoScheme()) { this.updateBuffer(); termBuffer.put(termAtt.buffer(), 0, termAtt.length()); // return only the mail part posIncrAtt.setPositionIncrement(1); termAtt.copyBuffer(termBuffer.array(), 7, termBuffer.position() - 7); } return true; } return false; }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer source = new WhitespaceTokenizer(matchVersion, reader); TokenStream sink = new URIDecodingFilter(source, "UTF-8"); sink = this.applyURINormalisation(sink); sink = new MailtoFilter(sink); sink = new LowerCaseFilter(matchVersion, sink ); sink = new StopFilter(matchVersion, sink, stopSet); sink = new LengthFilter(true, sink, 2, 256); return new TokenStreamComponents(source, sink); }
private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems, final String[] expectedTypes, final int[] expectedPosIncr) throws IOException { assertTrue("has CharTermAttribute", t.hasAttribute(CharTermAttribute.class)); final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class)); final TypeAttribute typeAtt = t.getAttribute(TypeAttribute.class); assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class)); final PositionIncrementAttribute posIncrAtt = t.getAttribute(PositionIncrementAttribute.class); t.setReader(new StringReader(uri)); t.reset(); final TokenFilter filter = new MailtoFilter(t); for (int i = 0; i < expectedStems.length; i++) { assertTrue("token " + i + " exists", filter.incrementToken()); assertEquals(expectedStems[i], termAtt.toString()); if (expectedTypes == null) assertEquals(uritype, typeAtt.type()); else assertEquals(expectedTypes[i], typeAtt.type()); if (expectedPosIncr != null) assertEquals(expectedPosIncr[i], posIncrAtt.getPositionIncrement()); } filter.end(); filter.close(); }