@Override public Tokenizer create() { return new JapaneseTokenizer(null, true, Mode.SEARCH); } }));
@Override public Tokenizer makeTokenizer(Reader r) { return new org.apache.lucene.analysis.ja.JapaneseTokenizer(r, null, false, org.apache.lucene.analysis.ja.JapaneseTokenizer.DEFAULT_MODE); }
@Override public Tokenizer create(Reader reader) { return new JapaneseTokenizer(reader, userDictionary, discartPunctuation, mode); }
@Override public Tokenizer create(Reader reader) { return new JapaneseTokenizer(reader, null, true, Mode.SEARCH); } }));
public Tokenizer create(Reader reader) { return new JapaneseTokenizer(reader, compositeTokenFilter, dictionaryDir); } }
@Override public Tokenizer create() { return new JapaneseTokenizer(userDictionary, discartPunctuation, mode); }
TokenizerWrapper() { super(); tokenizerTimestamp = dictionaryTimestamp; tokenizer = new JapaneseTokenizer(userDictionary, discartPunctuation, mode); try { final Field attributesField = getAccessibleField(AttributeSource.class, "attributes"); final Object attributesObj = attributesField.get(tokenizer); attributesField.set(this, attributesObj); final Field attributeImplsField = getAccessibleField(AttributeSource.class, "attributeImpls"); final Object attributeImplsObj = attributeImplsField.get(tokenizer); attributeImplsField.set(this, attributeImplsObj); final Field currentStateField = getAccessibleField(AttributeSource.class, "currentState"); final Object currentStateObj = currentStateField.get(tokenizer); currentStateField.set(this, currentStateObj); } catch (final Exception e) { throw new IllegalStateException( "Failed to update the tokenizer.", e); } }
@Override public Tokenizer create() { final JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode); int nBestCost = this.nBestCost; if (nBestExamples != null) { nBestCost = Math.max(nBestCost, t.calcNBestCost(nBestExamples)); } t.setNBestCost(nBestCost); return t; }
@Override public JapaneseTokenizer create(AttributeFactory factory) { JapaneseTokenizer t = new JapaneseTokenizer(factory, userDictionary, discardPunctuation, mode); if (nbestExamples != null) { nbestCost = Math.max(nbestCost, t.calcNBestCost(nbestExamples)); } t.setNBestCost(nbestCost); return t; } }
@SuppressWarnings("resource") @Override protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) throws IOException { if (stemsAllowed) { // Blank out tags when stemming only strOrig = blankOutTags(strOrig); CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET; Set<String> stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.emptySet(); return new JapaneseAnalyzer(null, Mode.SEARCH, stopWords, stopTags).tokenStream("", new StringReader(strOrig)); } else { JapaneseTokenizer tokenizer = new JapaneseTokenizer(null, false, Mode.NORMAL); tokenizer.setReader(new StringReader(strOrig)); return new TagJoiningFilter(tokenizer); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode); TokenStream stream = new JapaneseBaseFormFilter(tokenizer); stream = new JapanesePartOfSpeechStopFilter(stream, stoptags); stream = new CJKWidthFilter(stream); stream = new StopFilter(stream, stopwords); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(stream); return new TokenStreamComponents(tokenizer, stream); }
/** * Creates * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * built from a {@link JapaneseTokenizer} filtered with * {@link JapaneseWidthFilter}, {@link JapanesePunctuationFilter}, * {@link JapanesePartOfSpeechStopFilter}, {@link JapaneseStopFilter}, * {@link KeywordMarkerFilter} if a stem exclusion set is provided, * {@link JapaneseBasicFormFilter}, {@link JapaneseKatakanaStemFilter}, * and {@link LowerCaseFilter} */ @Override protected TokenStreamComponents createComponents(String field, Reader reader) { Tokenizer tokenizer = new JapaneseTokenizer(reader, null, dictionaryDir); TokenStream stream = new JapaneseWidthFilter(tokenizer); stream = new JapanesePunctuationFilter(true, stream); stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags); stream = new StopFilter(matchVersion, stream, stopwords); if (!stemExclusionSet.isEmpty()) stream = new KeywordMarkerFilter(stream, stemExclusionSet); stream = new JapaneseBasicFormFilter(stream); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(matchVersion, stream); return new TokenStreamComponents(tokenizer, stream); } }