Tabnine Logo
JapaneseTokenizer.<init>
Code IndexAdd Tabnine to your IDE (free)

How to use
org.apache.lucene.analysis.ja.JapaneseTokenizer
constructor

Best Java code snippets using org.apache.lucene.analysis.ja.JapaneseTokenizer.<init> (Showing top 12 results out of 315)

origin: org.elasticsearch.plugin/analysis-kuromoji

  @Override
  public Tokenizer create() {
    return new JapaneseTokenizer(null, true, Mode.SEARCH);
  }
}));
origin: shilad/wikibrain

@Override
public Tokenizer makeTokenizer(Reader r) {
  return new org.apache.lucene.analysis.ja.JapaneseTokenizer(r, null, false, org.apache.lucene.analysis.ja.JapaneseTokenizer.DEFAULT_MODE);
}
origin: org.elasticsearch/elasticsearch-analysis-kuromoji

@Override
public Tokenizer create(Reader reader) {
  return new JapaneseTokenizer(reader, userDictionary,
      discartPunctuation, mode);
}
origin: org.elasticsearch/elasticsearch-analysis-kuromoji

  @Override
  public Tokenizer create(Reader reader) {
    return new JapaneseTokenizer(reader, null, true,
        Mode.SEARCH);
  }
}));
origin: com.google.code/lucene-gosen-ipadic

 public Tokenizer create(Reader reader) {
  return new JapaneseTokenizer(reader, compositeTokenFilter, dictionaryDir);
 }
}
origin: org.elasticsearch.plugin/analysis-kuromoji

@Override
public Tokenizer create() {
  return new JapaneseTokenizer(userDictionary, discartPunctuation, mode);
}
origin: org.codelibs/elasticsearch-analysis-ja

TokenizerWrapper() {
  super();
  tokenizerTimestamp = dictionaryTimestamp;
  tokenizer = new JapaneseTokenizer(userDictionary,
      discartPunctuation, mode);
  try {
    final Field attributesField = getAccessibleField(AttributeSource.class, "attributes");
    final Object attributesObj = attributesField.get(tokenizer);
    attributesField.set(this, attributesObj);
    final Field attributeImplsField = getAccessibleField(AttributeSource.class, "attributeImpls");
    final Object attributeImplsObj = attributeImplsField.get(tokenizer);
    attributeImplsField.set(this, attributeImplsObj);
    final Field currentStateField = getAccessibleField(AttributeSource.class, "currentState");
    final Object currentStateObj = currentStateField.get(tokenizer);
    currentStateField.set(this, currentStateObj);
  } catch (final Exception e) {
    throw new IllegalStateException(
        "Failed to update the tokenizer.", e);
  }
}
origin: org.codelibs/elasticsearch-analysis-ja

@Override
public Tokenizer create() {
  final JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode);
  int nBestCost = this.nBestCost;
  if (nBestExamples != null) {
    nBestCost = Math.max(nBestCost, t.calcNBestCost(nBestExamples));
  }
  t.setNBestCost(nBestCost);
  return t;
}
origin: org.apache.lucene/lucene-analyzers-kuromoji

 @Override
 public JapaneseTokenizer create(AttributeFactory factory) {
  JapaneseTokenizer t = new JapaneseTokenizer(factory, userDictionary, discardPunctuation, mode);
  if (nbestExamples != null) {
   nbestCost = Math.max(nbestCost, t.calcNBestCost(nbestExamples));
  }
  t.setNBestCost(nbestCost);
  return t;
 }
}
origin: omegat-org/omegat

@SuppressWarnings("resource")
@Override
protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed)
    throws IOException {
  if (stemsAllowed) {
    // Blank out tags when stemming only
    strOrig = blankOutTags(strOrig);
    CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET;
    Set<String> stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.emptySet();
    return new JapaneseAnalyzer(null, Mode.SEARCH, stopWords, stopTags).tokenStream("",
        new StringReader(strOrig));
  } else {
    JapaneseTokenizer tokenizer = new JapaneseTokenizer(null, false, Mode.NORMAL);
    tokenizer.setReader(new StringReader(strOrig));
    return new TagJoiningFilter(tokenizer);
  }
}
origin: org.apache.lucene/lucene-analyzers-kuromoji

@Override
protected TokenStreamComponents createComponents(String fieldName) {
 Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode);
 TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
 stream = new JapanesePartOfSpeechStopFilter(stream, stoptags);
 stream = new CJKWidthFilter(stream);
 stream = new StopFilter(stream, stopwords);
 stream = new JapaneseKatakanaStemFilter(stream);
 stream = new LowerCaseFilter(stream);
 return new TokenStreamComponents(tokenizer, stream);
}
origin: com.google.code/lucene-gosen-ipadic

 /**
  * Creates
  * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
  * used to tokenize all the text in the provided {@link Reader}.
  * 
  * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
  *         built from a {@link JapaneseTokenizer} filtered with
  *         {@link JapaneseWidthFilter}, {@link JapanesePunctuationFilter},
  *         {@link JapanesePartOfSpeechStopFilter}, {@link JapaneseStopFilter},
  *         {@link KeywordMarkerFilter} if a stem exclusion set is provided, 
  *         {@link JapaneseBasicFormFilter}, {@link JapaneseKatakanaStemFilter},
  *         and  {@link LowerCaseFilter}
  */
 @Override
 protected TokenStreamComponents createComponents(String field, Reader reader) {
  Tokenizer tokenizer = new JapaneseTokenizer(reader, null, dictionaryDir);
  TokenStream stream = new JapaneseWidthFilter(tokenizer);
  stream = new JapanesePunctuationFilter(true, stream);
  stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags);
  stream = new StopFilter(matchVersion, stream, stopwords);
  if (!stemExclusionSet.isEmpty())
   stream = new KeywordMarkerFilter(stream, stemExclusionSet);
  stream = new JapaneseBasicFormFilter(stream);
  stream = new JapaneseKatakanaStemFilter(stream);
  stream = new LowerCaseFilter(matchVersion, stream);
  return new TokenStreamComponents(tokenizer, stream);
 }
}
org.apache.lucene.analysis.jaJapaneseTokenizer<init>

Javadoc

Create a new JapaneseTokenizer.

Uses the default AttributeFactory.

Popular methods of JapaneseTokenizer

  • setReader
  • calcNBestCost
  • clearAttributes
  • close
  • correctOffset
  • end
  • incrementToken
  • reset
  • setNBestCost
  • add
  • backtrace
  • backtraceNBest
  • backtrace,
  • backtraceNBest,
  • computePenalty,
  • computeSecondBestThreshold,
  • equals,
  • fixupPendingList,
  • getDict,
  • hashCode,
  • isPunctuation

Popular in Java

  • Running tasks concurrently on multiple threads
  • compareTo (BigDecimal)
  • setContentView (Activity)
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • System (java.lang)
    Provides access to system-related information and resources including standard input and output. Ena
  • Date (java.sql)
    A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
  • DecimalFormat (java.text)
    A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
  • Random (java.util)
    This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
  • SortedMap (java.util)
    A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
  • Vector (java.util)
    Vector is an implementation of List, backed by an array and synchronized. All optional operations in
  • Best plugins for Eclipse
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now