@Override public IndexWriterConfig newInstance() { IndexWriterConfig verboseConfig = IndexWriterConfigs.standard(); verboseConfig.setCodec( Codec.getDefault() ); return verboseConfig; } }
iwc.setCodec(new Lucene70Codec( Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); writer = new IndexWriter(indexDirectory, iwc);
public static IndexWriterConfig standard( Analyzer analyzer ) { IndexWriterConfig writerConfig = new IndexWriterConfig( analyzer ); writerConfig.setMaxBufferedDocs( MAX_BUFFERED_DOCS ); writerConfig.setMaxBufferedDeleteTerms( MAX_BUFFERED_DELETE_TERMS ); writerConfig.setIndexDeletionPolicy( new SnapshotDeletionPolicy( new KeepOnlyLastCommitDeletionPolicy() ) ); writerConfig.setUseCompoundFile( true ); writerConfig.setRAMBufferSizeMB( STANDARD_RAM_BUFFER_SIZE_MB ); writerConfig.setCodec(new Lucene54Codec() { @Override public PostingsFormat getPostingsFormatForField( String field ) { PostingsFormat postingFormat = super.getPostingsFormatForField( field ); return CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT ? blockTreeOrdsPostingsFormat : postingFormat; } }); if ( CUSTOM_MERGE_SCHEDULER ) { writerConfig.setMergeScheduler( new PooledConcurrentMergeScheduler() ); } LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setNoCFSRatio( MERGE_POLICY_NO_CFS_RATIO ); mergePolicy.setMinMergeMB( MERGE_POLICY_MIN_MERGE_MB ); mergePolicy.setMergeFactor( MERGE_POLICY_MERGE_FACTOR ); writerConfig.setMergePolicy( mergePolicy ); return writerConfig; }
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config);
config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config);
//You could also simply pass in Version.LUCENE_40 here, and not worry about the Codec //(though that will likely affect other things as well) IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer); config.setCodec(new Lucene40Codec()); IndexWriter writer = new IndexWriter(directory, config);
private IndexWriterConfig newConfig(final Analyzer analyzer) { final IndexWriterConfig config = new IndexWriterConfig(analyzer); final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return PostingsFormat.forName("Memory"); } else { return PostingsFormat.forName("Lucene50"); } } }; config.setCodec(codec); return config; }
private IndexWriterConfig newConfig( final Analyzer analyzer ) { final IndexWriterConfig config = new IndexWriterConfig( LUCENE_40, analyzer ); final Codec codec = new Lucene40Codec() { @Override public PostingsFormat getPostingsFormatForField( String field ) { if ( field.equals( "id" ) ) { return PostingsFormat.forName( "Memory" ); } else { return PostingsFormat.forName( "Lucene40" ); } } }; config.setCodec( codec ); return config; }
iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh if (config().getIndexSort() != null) {
private IndexWriter initializeIndexWriter() throws IOException { final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, this.initializeAnalyzer()); // Register the SIREn codec config.setCodec(new Siren10Codec()); return new IndexWriter(dir, config); }
private IndexWriter initializeIndexWriter() throws IOException { final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, this.initializeAnalyzer()); // Register the SIREn codec config.setCodec(new Siren10Codec()); return new IndexWriter(dir, config); }
public void run() { // do it ... try { // IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setCodec(new LireCustomCodec()); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config); for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) { File inputFile = iterator.next(); if (verbose) System.out.println("Processing " + inputFile.getPath() + "."); readFile(indexWriter, inputFile); if (verbose) System.out.println("Indexing finished."); } indexWriter.commit(); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
public IndexWriter openLuceneIndex(File aIndexDir) throws IOException { Directory directory = FSDirectory.open(aIndexDir.toPath()); // Create parser configuration as a JSON object JSONObject jsonParserConfiguration = new JSONObject(); // Add the project id to the configuration jsonParserConfiguration.put("projectId", project.getId()); // Tokenizer parameters Map<String, String> paramsTokenizer = new HashMap<String, String>(); paramsTokenizer.put(MtasTokenizerFactory.ARGUMENT_PARSER, MTAS_PARSER); paramsTokenizer.put(MtasTokenizerFactory.ARGUMENT_PARSER_ARGS, jsonParserConfiguration.toString()); // Build analyzer Analyzer mtasAnalyzer = CustomAnalyzer.builder() .withTokenizer(MTAS_TOKENIZER, paramsTokenizer).build(); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzerPerField); // Build IndexWriter IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setUseCompoundFile(false); config.setCodec(Codec.forName(MtasCodec.MTAS_CODEC_NAME)); return new IndexWriter(directory, config); }
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
IndexWriter createWriter(boolean create) throws IOException { try { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Exception ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { logger.warn("could not lock IndexWriter", ex); throw ex; } }
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer, final Codec codec) throws IOException { return newRandomIndexWriter(dir, analyzer, codec, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setCodec(codec).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); }
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer, final Codec codec) throws IOException { return newRandomIndexWriter(dir, analyzer, codec, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setCodec(codec).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); }