org.apache.lucene.util.IOUtils.getDecodingReader java code examples

/**
 * Opens a Reader for the given resource using a {@link CharsetDecoder}.
 * Unlike Java's defaults this reader will throw an exception if your it detects 
 * the read charset doesn't match the expected {@link Charset}. 
 * <p>
 * Decoding readers are useful to load configuration files, stopword lists or synonym files
 * to detect character set problems. However, it's not recommended to use as a common purpose 
 * reader.
 * @param clazz the class used to locate the resource
 * @param resource the resource name to load
 * @param charSet the expected charset
 * @return a reader to read the given file
 * 
 */
public static Reader getDecodingReader(Class<?> clazz, String resource, Charset charSet) throws IOException {
 InputStream stream = null;
 boolean success = false;
 try {
  stream = clazz
  .getResourceAsStream(resource);
  final Reader reader = getDecodingReader(stream, charSet);
  success = true;
  return reader;
 } finally {
  if (!success) {
   IOUtils.close(stream);
  }
 }
}

boolean success = false;
try {
 input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));

Reader reader = null;
try {
 reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
 return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
} finally {

/**
 * Creates a dictionary based on an inputstream.
 * <p>
 * NOTE: content is treated as UTF-8
 */
public PlainTextDictionary(InputStream dictFile) {
 in = new BufferedReader(IOUtils.getDecodingReader(dictFile, StandardCharsets.UTF_8));
}

/**
 * Creates a dictionary based on an inputstream.
 * Using <code>fieldDelimiter</code> to seperate out the
 * fields in a line.
 * <p>
 * NOTE: content is treated as UTF-8
 */
public FileDictionary(InputStream dictFile, String fieldDelimiter) {
 in = new BufferedReader(IOUtils.getDecodingReader(dictFile, StandardCharsets.UTF_8));
 this.fieldDelimiter = fieldDelimiter;
}

/**
 * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
 * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
 */
@Deprecated
public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException {
 this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
   IOUtils.CHARSET_UTF_8), STOPWORDS_COMMENT, matchVersion));
}

/**
 * Builds an analyzer with the given stop words. Lines can be commented out
 * using {@link #STOPWORDS_COMMENT}
 * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
 */
@Deprecated
public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException {
 this(matchVersion, WordlistLoader.getWordSet(
   IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
   STOPWORDS_COMMENT, matchVersion));
}

/**
 * Builds an analyzer with the given stop words.
 * @throws IOException
 * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
 */
@Deprecated
public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException {
 this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
   IOUtils.CHARSET_UTF_8), matchVersion));
}

/**
 * Builds an analyzer with the given stop words.
 * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
 */
@Deprecated
public BrazilianAnalyzer(Version matchVersion, File stopwords)
  throws IOException {
 this(matchVersion, WordlistLoader.getWordSet(
   IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));
}

/**
 * Builds an analyzer with the given stop words.
 * @deprecated use {@link #GermanAnalyzer(Version, Set)}
 */
@Deprecated
public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException {
 this(matchVersion, WordlistLoader.getWordSet(
   IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));
}

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param matchVersion Lucene version to match See
 *          {@link <a href="#version">above</a>}
 * @param stopwords a file containing stopwords
 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
 */
@Deprecated
public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
 this(matchVersion, (Set<?>)WordlistLoader.getWordSet(
   IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));
 }

private BreakIterator parseRules(String filename, ResourceLoader loader) throws IOException {
 StringBuilder rules = new StringBuilder();
 InputStream rulesStream = loader.openResource(filename);
 BufferedReader reader = new BufferedReader
   (IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8));
 String line = null;
 while ((line = reader.readLine()) != null) {
  if ( ! line.startsWith("#"))
   rules.append(line);
  rules.append('\n');
 }
 reader.close();
 return new RuleBasedBreakIterator(rules.toString());
}

/**
 * Builds an exclusionlist from the words contained in the given file.
 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 */
@Deprecated
public void setStemExclusionTable( File exclusionlist ) throws IOException {
  excltable = WordlistLoader.getWordSet(
    IOUtils.getDecodingReader(exclusionlist, IOUtils.CHARSET_UTF_8), matchVersion);
  setPreviousTokenStream(null); // force a new stemmer to be created
}

/**
 * Builds an exclusionlist from the words contained in the given file.
 * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
 */
@Deprecated
public void setStemExclusionTable(File exclusionlist) throws IOException {
 exclusionSet = WordlistLoader.getWordSet(IOUtils.getDecodingReader(exclusionlist,
   IOUtils.CHARSET_UTF_8), matchVersion);
 setPreviousTokenStream(null); // force a new stemmer to be created
}

/**
 * Builds an exclusionlist from the words contained in the given file.
 * @throws IOException
 * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
 */
@Deprecated
public void setStemExclusionTable(File exclusionlist) throws IOException {
 excltable = WordlistLoader.getWordSet(IOUtils.getDecodingReader(exclusionlist,
   IOUtils.CHARSET_UTF_8), matchVersion);
 setPreviousTokenStream(null); // force a new stemmer to be created
}

/**
 * Builds an exclusionlist from the words contained in the given file.
 * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
 */
@Deprecated
public void setStemExclusionTable(File exclusionlist) {
 try {
  
  excltable = WordlistLoader.getWordSet(IOUtils.getDecodingReader(exclusionlist,
    IOUtils.CHARSET_UTF_8), matchVersion);
  setPreviousTokenStream(null); // force a new stemmer to be created
 } catch (IOException e) {
  // TODO: throw IOException
  throw new RuntimeException(e);
 }
}

 static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
    .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
      StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT));
 }
}

  static CharArraySet loadDefaultStopWordSet() throws IOException {
    // make sure it is unmodifiable as we expose it in the outer class
    return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(
        IOUtils.getDecodingReader(JiebaAnalyzer.class,
            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
        STOPWORD_FILE_COMMENT));
  }
}

public NumberConcatenationFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings) {
  super(indexSettings, name, settings);
  final String suffixWordsPath = settings.get("suffix_words_path");
  if (suffixWordsPath != null) {
    final File suffixWordsFile = environment.configFile().resolve(suffixWordsPath).toFile();
    try (Reader reader = IOUtils.getDecodingReader(new FileInputStream(suffixWordsFile), StandardCharsets.UTF_8)) {
      suffixWords = WordlistLoader.getWordSet(reader);
    } catch (final IOException e) {
      throw new IllegalArgumentException("Could not load " + suffixWordsFile.getAbsolutePath(), e);
    }
  } else {
    suffixWords = new CharArraySet(0, false);
  }
}

public DictRadix<Byte> setCustomTokenizationCases(InputStream input) throws IOException {
  if (input != null) {
    final CharArraySet wordsList = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(
        input, StandardCharsets.UTF_8));
    final DictRadix<Byte> radix = new DictRadix<>(false);
    for (Object aWordsList : wordsList) {
      radix.addNode((char[]) aWordsList, dummyData);
    }
    SPECIAL_TOKENIZATION_CASES = radix;
  }
  return SPECIAL_TOKENIZATION_CASES;
}

Javadoc

Opens a Reader for the given File using a CharsetDecoder. Unlike Java's defaults this reader will throw an exception if your it detects the read charset doesn't match the expected Charset.

Decoding readers are useful to load configuration files, stopword lists or synonym files to detect character set problems. However, its not recommended to use as a common purpose reader.

Popular methods of IOUtils

close
Closes all given Closeables. Some of theCloseables may be null; they are ignored. After everything i
closeWhileHandlingException
Closes all given Closeables, suppressing all thrown exceptions. Some of the Closeables may be null,
rm
Deletes one or more files or directories (and everything underneath it).
deleteFilesIgnoringExceptions
Deletes all given files, suppressing all thrown IOExceptions. Some of the files may be null, if so t
fsync
Ensure that any writes to the given file is written to the storage device that contains it.
deleteFilesIfExist
Deletes all given Paths, if they exist. Some of theFiles may be null; they are ignored. After everyt
spins
If the dir is an FSDirectory or wraps one via possibly nested FilterDirectory or FileSwitchDirectory
getFileStore
getMountPoint
rethrowAlways
This utility method takes a previously caught (non-null) Throwable and rethrows either the original
spinsLinux
addSuppressed
adds a Throwable to the list of suppressed Exceptions of the first Throwable

Popular in Java

Updating database using SQL prepared statement
requestLocationUpdates (LocationManager)
notifyDataSetChanged (ArrayAdapter)
getApplicationContext (Context)
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Menu (java.awt)
Join (org.hibernate.mapping)
Best plugins for Eclipse

How to use getDecodingReadermethodin org.apache.lucene.util.IOUtils

Best Java code snippets using org.apache.lucene.util.IOUtils.getDecodingReader (Showing top 20 results out of 315)

How to use
getDecodingReader
method
in
org.apache.lucene.util.IOUtils