@Override public TokenStream create(final TokenStream input) { final DatatypeAnalyzerFilter f = new DatatypeAnalyzerFilter(luceneDefaultVersion, input); for (final String datatype : analyzers.keySet()) { f.register(datatype.toCharArray(), analyzers.get(datatype)); } return f; }
public DatatypeAnalyzerFilter(final Version version, final TokenStream input) { super(input); dtsAnalyzer = new CharArrayMap<Analyzer>(version, 64, false); this.initAttributes(); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final Version matchVersion = Version.LUCENE_40; final JsonTokenizer src = new JsonTokenizer(reader); TokenStream tok = new DatatypeAnalyzerFilter(matchVersion, src, new StandardAnalyzer(matchVersion), new StandardAnalyzer(matchVersion)); tok = new LowerCaseFilter(matchVersion, tok); // The PositionAttributeFilter and SirenPayloadFilter are mandatory // and must be always the last filters in your token stream tok = new PositionAttributeFilter(tok); tok = new SirenPayloadFilter(tok); return new TokenStreamComponents(src, tok); }
/** * Create a {@link DatatypeAnalyzerFilter} with the given default * {@link Analyzer}s for the {@link JSONDatatype#JSON_FIELD} and * {@link XSDDatatype#XSD_STRING}. * * @param version The Lucene version to use * @param input the input token stream * @param fieldAnalyzer the default field name {@link Analyzer} * @param valueAnalyzer the default value {@link Analyzer} */ public DatatypeAnalyzerFilter(final Version version, final TokenStream input, final Analyzer fieldAnalyzer, final Analyzer valueAnalyzer) { this(version, input); // register the default analyzers this.register(XSDDatatype.XSD_STRING.toCharArray(), valueAnalyzer); this.register(JSONDatatype.JSON_FIELD.toCharArray(), fieldAnalyzer); }
this.initTokenAttributes(); this.copyInnerStreamAttributes(); return true;
/** * Initialise the attributes of the main stream */ private void initAttributes() { termAtt = input.getAttribute(CharTermAttribute.class); offsetAtt = input.getAttribute(OffsetAttribute.class); posIncrAtt = input.getAttribute(PositionIncrementAttribute.class); typeAtt = input.getAttribute(TypeAttribute.class); dtypeAtt = input.getAttribute(DatatypeAttribute.class); nodeAtt = this.addAttribute(NodeAttribute.class); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final JsonTokenizer source = new JsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(matchVersion, source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final TupleTokenizer source = new TupleTokenizer(reader); TokenStream sink = new TokenTypeFilter(source, new int[] {TupleTokenizer.BNODE, TupleTokenizer.DOT}); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(matchVersion, sink, anyURIAnalyzer, stringAnalyzer); for (final Entry<Object, Analyzer> e : regLitAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }