congrats Icon
New! Announcing Tabnine Chat Beta
Learn More
Tabnine Logo
Dictionary
Code IndexAdd Tabnine to your IDE (free)

How to use
Dictionary
in
org.apache.lucene.analysis.hunspell

Best Java code snippets using org.apache.lucene.analysis.hunspell.Dictionary (Showing top 20 results out of 315)

origin: org.elasticsearch/elasticsearch

return new Dictionary(tmp, "hunspell", affixStream, dicStreams, ignoreCase);
origin: org.apache.lucene/lucene-analyzers-common

  parseAlias(line);
 } else if (line.startsWith(MORPH_ALIAS_KEY)) {
  parseMorphAlias(line);
 } else if (line.startsWith(PREFIX_KEY)) {
  parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
 } else if (line.startsWith(SUFFIX_KEY)) {
  parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
 } else if (line.startsWith(FLAG_KEY)) {
  flagParsingStrategy = getFlagParsingStrategy(line);
 } else if (line.equals(COMPLEXPREFIXES_KEY)) {
  complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
  FST<CharsRef> res = parseConversions(reader, num);
  if (type.equals("ICONV")) {
   iconv = res;
this.prefixes = affixFST(prefixes);
this.suffixes = affixFST(suffixes);
origin: org.apache.lucene/lucene-analyzers-common

 ch = caseFold(ch);
 applyMappings(iconv, reuse);
} catch (IOException bogus) {
 throw new RuntimeException(bogus);
  reuse.setCharAt(i, caseFold(reuse.charAt(i)));
origin: org.apache.lucene/lucene-analyzers-common

private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
 List<CharsRef> stems = new ArrayList<>();
 IntsRef forms = dictionary.lookupWord(word, 0, length);
 if (forms != null) {
  for (int i = 0; i < forms.length; i += formStep) {
   if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
    dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
    char wordFlags[] = Dictionary.decodeFlags(scratch);
    if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
     continue;
    if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) {
     continue;
    if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
     continue;
origin: org.apache.lucene/lucene-analyzers-common

int exceptionID = forms.ints[forms.offset + formID + 1];
if (exceptionID > 0) {
 exception = dictionary.getStemException(exceptionID);
} else {
 exception = null;
 Dictionary.applyMappings(dictionary.oconv, scratchSegment);
} catch (IOException bogus) {
 throw new RuntimeException(bogus);
origin: org.apache.lucene/lucene-analyzers-common

  continue;
 line = unescapeEntry(line);
   hasStemExceptions = parseStemException(line.substring(morphStart+1)) != null;
   CharSequence cleansed = cleanInput(line, sb);
   writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
  } else {
   String text = line.substring(0, flagSep);
   CharSequence cleansed = cleanInput(text, sb);
   if (cleansed != sb) {
    sb.setLength(0);
 String flagPart = line.substring(flagSep + 1, end);
 if (aliasCount > 0) {
  flagPart = getAliasValue(Integer.parseInt(flagPart));
 String stemException = parseStemException(line.substring(end+1));
 if (stemException != null) {
  stemExceptions = ArrayUtil.grow(stemExceptions, stemExceptionCount+1);
 throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry);
} else {
 encodeFlags(flagsScratch, wordForm);
 int ord = flagLookup.add(flagsScratch.get());
 if (ord < 0) {
origin: org.apache.lucene/lucene-analyzers-common

  flagPart = getAliasValue(Integer.parseInt(flagPart));
 condition = escapeDash(condition);
encodeFlags(scratch, appendFlags);
int appendFlagsOrd = flagLookup.add(scratch.get());
if (appendFlagsOrd < 0) {
 CharSequence cleaned = cleanInput(affixArg, sb);
 affixArg = cleaned.toString();
origin: org.apache.lucene/lucene-analyzers-common

@Override
public void inform(ResourceLoader loader) throws IOException {
 String dicts[] = dictionaryFiles.split(",");
 InputStream affix = null;
 List<InputStream> dictionaries = new ArrayList<>();
 try {
  dictionaries = new ArrayList<>();
  for (String file : dicts) {
   dictionaries.add(loader.openResource(file));
  }
  affix = loader.openResource(affixFile);
  Path tempPath = Files.createTempDirectory(Dictionary.getDefaultTempDir(), "Hunspell");
  try (Directory tempDir = FSDirectory.open(tempPath)) {
   this.dictionary = new Dictionary(tempDir, "hunspell", affix, dictionaries, ignoreCase);
  } finally {
   IOUtils.rm(tempPath); 
  }
 } catch (ParseException e) {
  throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
 } finally {
  IOUtils.closeWhileHandlingException(affix);
  IOUtils.closeWhileHandlingException(dictionaries);
 }
}
origin: org.apache.lucene/lucene-analyzers-common

  char appendFlags[] = Dictionary.decodeFlags(scratch);
  compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 char appendFlags[] = Dictionary.decodeFlags(scratch);
 assert prevFlag >= 0;
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
} else {
  char appendFlags[] = Dictionary.decodeFlags(scratch);
  compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 char appendFlags[] = Dictionary.decodeFlags(scratch);
 assert prevFlag >= 0;
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
} else {
origin: org.apache.lucene/lucene-analyzers-common

/** folds titlecase variant of word to titleBuffer */
private void caseFoldTitle(char word[], int length) {
 titleBuffer = ArrayUtil.grow(titleBuffer, length);
 System.arraycopy(word, 0, titleBuffer, 0, length);
 for (int i = 1; i < length; i++) {
  titleBuffer[i] = dictionary.caseFold(titleBuffer[i]);
 }
}

origin: org.apache.lucene/lucene-analyzers-common

scratchSegment.setLength(0);
scratchSegment.append(word, 0, length);
CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
length = segment.length();
origin: org.infinispan/infinispan-embedded-query

  continue;
 line = unescapeEntry(line);
   hasStemExceptions = parseStemException(line.substring(morphStart+1)) != null;
   CharSequence cleansed = cleanInput(line, sb);
   writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
  } else {
   String text = line.substring(0, flagSep);
   CharSequence cleansed = cleanInput(text, sb);
   if (cleansed != sb) {
    sb.setLength(0);
 String flagPart = line.substring(flagSep + 1, end);
 if (aliasCount > 0) {
  flagPart = getAliasValue(Integer.parseInt(flagPart));
 String stemException = parseStemException(line.substring(end+1));
 if (stemException != null) {
  if (stemExceptionCount == stemExceptions.length) {
 throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry);
} else {
 encodeFlags(flagsScratch, wordForm);
 int ord = flagLookup.add(flagsScratch.get());
 if (ord < 0) {
origin: org.apache.lucene/lucene-analyzers-common

IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
if (forms != null) {
 for (int i = 0; i < forms.length; i += formStep) {
  dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
  char wordFlags[] = Dictionary.decodeFlags(scratch);
  if (Dictionary.hasFlag(wordFlags, flag)) {
   if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag)) {
    char appendFlags[] = Dictionary.decodeFlags(scratch);
    if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) {
     continue;
    char appendFlags[] = Dictionary.decodeFlags(scratch);
    boolean suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
    if (circumfix != suffixCircumfix) {
     continue;
   if (caseVariant && dictionary.keepcase != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
    continue;
   if (dictionary.onlyincompound != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
    continue;
 char appendFlags[] = Dictionary.decodeFlags(scratch);
 circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
origin: org.infinispan/infinispan-embedded-query

  flagPart = getAliasValue(Integer.parseInt(flagPart));
 condition = escapeDash(condition);
encodeFlags(scratch, appendFlags);
int appendFlagsOrd = flagLookup.add(scratch.get());
if (appendFlagsOrd < 0) {
 CharSequence cleaned = cleanInput(affixArg, sb);
 affixArg = cleaned.toString();
origin: org.infinispan/infinispan-embedded-query

int exceptionID = forms.ints[forms.offset + formID + 1];
if (exceptionID > 0) {
 exception = dictionary.getStemException(exceptionID);
} else {
 exception = null;
 Dictionary.applyMappings(dictionary.oconv, scratchSegment);
} catch (IOException bogus) {
 throw new RuntimeException(bogus);
origin: org.infinispan/infinispan-embedded-query

  char appendFlags[] = Dictionary.decodeFlags(scratch);
  compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 char appendFlags[] = Dictionary.decodeFlags(scratch);
 assert prevFlag >= 0;
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
} else {
  char appendFlags[] = Dictionary.decodeFlags(scratch);
  compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 char appendFlags[] = Dictionary.decodeFlags(scratch);
 assert prevFlag >= 0;
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
} else {
origin: org.apache.lucene/lucene-analyzers-common

/** folds lowercase variant of word (title cased) to lowerBuffer */
private void caseFoldLower(char word[], int length) {
 lowerBuffer = ArrayUtil.grow(lowerBuffer, length);
 System.arraycopy(word, 0, lowerBuffer, 0, length);
 lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]);
}

origin: org.infinispan/infinispan-embedded-query

scratchSegment.setLength(0);
scratchSegment.append(word, 0, length);
CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
length = segment.length();
origin: omegat-org/omegat

private static Dictionary initDict(Language language) {
  populateInstalledDicts();
  File affixFile;
  File dictionaryFile;
  synchronized (HunspellTokenizer.class) {
    affixFile = affixFiles.get(language);
    dictionaryFile = dictionaryFiles.get(language);
  }
  if (affixFile == null || dictionaryFile == null || !affixFile.exists() || !dictionaryFile.exists()) {
    Log.logErrorRB("HUNSPELL_TOKENIZER_DICT_NOT_INSTALLED", language.getLocale());
    return null;
  }
  try {
    return new Dictionary(new FileInputStream(affixFile), new FileInputStream(dictionaryFile));
  } catch (Throwable t) {
    Log.log(t);
    return null;
  }
}
origin: org.infinispan/infinispan-embedded-query

  parseAlias(line);
 } else if (line.startsWith(MORPH_ALIAS_KEY)) {
  parseMorphAlias(line);
 } else if (line.startsWith(PREFIX_KEY)) {
  parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
 } else if (line.startsWith(SUFFIX_KEY)) {
  parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
 } else if (line.startsWith(FLAG_KEY)) {
  flagParsingStrategy = getFlagParsingStrategy(line);
 } else if (line.equals(COMPLEXPREFIXES_KEY)) {
  complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
  FST<CharsRef> res = parseConversions(reader, num);
  if (type.equals("ICONV")) {
   iconv = res;
this.prefixes = affixFST(prefixes);
this.suffixes = affixFST(suffixes);
org.apache.lucene.analysis.hunspellDictionary

Javadoc

In-memory structure for the dictionary (.dic) and affix (.aff) data of a hunspell dictionary.

Most used methods

  • <init>
    Creates a new Dictionary containing the information read from the provided InputStreams to hunspell
  • affixFST
  • applyMappings
  • caseFold
    folds single character (according to LANG if present)
  • cleanInput
  • decodeFlags
  • encodeFlags
  • escapeDash
  • getAliasValue
  • getDictionaryEncoding
    Parses the encoding specified in the affix file readable through the provided InputStream
  • getFlagParsingStrategy
    Determines the appropriate FlagParsingStrategy based on the FLAG definition line taken from the affi
  • getJavaEncoding
    Retrieves the CharsetDecoder for the given encoding. Note, This isn't perfect as I think ISCII-DEVAN
  • getFlagParsingStrategy,
  • getJavaEncoding,
  • getStemException,
  • hasFlag,
  • indexOfSpaceOrTab,
  • lookup,
  • lookupWord,
  • morphBoundary,
  • parseAffix,
  • parseAlias

Popular in Java

  • Parsing JSON documents to java classes using gson
  • getSupportFragmentManager (FragmentActivity)
  • getContentResolver (Context)
  • addToBackStack (FragmentTransaction)
  • Container (java.awt)
    A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
  • FileInputStream (java.io)
    An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
  • GregorianCalendar (java.util)
    GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
  • TreeMap (java.util)
    Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
  • Vector (java.util)
    Vector is an implementation of List, backed by an array and synchronized. All optional operations in
  • Annotation (javassist.bytecode.annotation)
    The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
  • Top 12 Jupyter Notebook extensions
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now