private void addPatterns(String id, Map<Integer, Set<E>> p, boolean commit) { try{ setIndexWriter(); Document doc = new Document(); doc.add(new StringField("sentid", id, Field.Store.YES)); doc.add(new Field("patterns", getBytes(p), LuceneFieldType.NOT_INDEXED)); indexWriter.addDocument(doc); if(commit){ indexWriter.commit(); //closeIndexWriter(); } }catch(IOException e){ throw new RuntimeException(e); } }
@Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { if (fieldType().indexOptions() == IndexOptions.NONE) { if (!fieldType().tokenized()) { if (stringValue() != null) { if (!(reuse instanceof StringTokenStream)) { ((StringTokenStream) reuse).setValue(stringValue()); return reuse; } else if (binaryValue() != null) { if (!(reuse instanceof BinaryTokenStream)) { ((BinaryTokenStream) reuse).setValue(binaryValue()); return reuse; } else { } else if (readerValue() != null) { return analyzer.tokenStream(name(), readerValue()); } else if (stringValue() != null) { return analyzer.tokenStream(name(), stringValue());
for (int i = 0; i < updates.length; i++) { final Field f = updates[i]; final DocValuesType dvType = f.fieldType().docValuesType(); if (dvType == null) { throw new NullPointerException("DocValuesType must not be null (field: \"" + f.name() + "\")"); throw new IllegalArgumentException("can only update NUMERIC or BINARY fields! field=" + f.name()); if (globalFieldNumberMap.contains(f.name(), dvType) == false) { globalFieldNumberMap.addOrGet(f.name(), -1, dvType, 0, 0, 0, f.name().equals(config.softDeletesField)); assert globalFieldNumberMap.contains(f.name(), dvType); if (config.getIndexSortFields().contains(f.name())) { throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort()); Long value = (Long)f.numericValue(); dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), value); break; case BINARY: dvUpdates[i] = new BinaryDocValuesUpdate(term, f.name(), f.binaryValue()); break; default: throw new IllegalArgumentException("can only update NUMERIC or BINARY fields: field=" + f.name() + ", type=" + dvType);
private IndexOp remove(final ArtifactContext ac) throws IOException { if (ac != null) { final String uinfo = ac.getArtifactInfo().getUinfo(); // add artifact deletion marker final Document doc = new Document(); doc.add(new Field(ArtifactInfo.DELETED, uinfo, Field.Store.YES, Field.Index.NO)); doc.add(new Field(ArtifactInfo.LAST_MODIFIED, // Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NO)); IndexWriter w = context.getIndexWriter(); w.addDocument(doc); w.deleteDocuments(new Term(ArtifactInfo.UINFO, uinfo)); return IndexOp.DELETED; } return IndexOp.NOOP; }
public static void main( String[] args ) throws Exception { Analyzer analyzer = new WhitespaceAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); Query query = parser.parse( "a x:b" ); FieldQuery fieldQuery = new FieldQuery( query, true, false ); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter( dir, analyzer, MaxFieldLength.LIMITED ); Document doc = new Document(); doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); writer.addDocument( doc ); writer.close(); IndexReader reader = IndexReader.open( dir, true ); FieldTermStack ftl = new FieldTermStack( reader, 0, "f", fieldQuery ); reader.close(); }
public RAMDirectory buildIndex() throws IOException { RAMDirectory ramDirectory = new RAMDirectory(); Document doc = new Document(); Field[] fields = new Field[]{new Field("firstName", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS), new Field("lastName", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS), new Field("address", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS), new Field("email", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS), new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)}; addFieldsToDocument(doc, fields); BufferedReader reader = new BufferedReader(new FileReader(namesFile)); IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); String line; while ((line = reader.readLine()) != null) { String[] personData = getPersonData(line); setFieldData(personData, fields); indexWriter.addDocument(doc); } indexWriter.close(); return ramDirectory; }
int numDocs = reader.numDocs(); progress.setCurrentState(State.RoSelection); boolean hasDeletions = reader.hasDeletions(); Document document = reader.document(i); document.add(new Field("ro-id", count + "", StringField.TYPE_STORED)); iw.addDocument(document); iw.commit(); iw.close(); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField); iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper).setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
Document doc = new Document(); doc.add(new Field(QueryBuilder.FULLPATH, path, string_ft_nstored_nanalyzed_norms)); StringWriter xrefOut = new StringWriter(); analyzer.analyze(doc, getStreamSource(path), xrefOut); Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);
String indexPath = cmdline.getOptionValue(INDEX_OPTION); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); writer.forceMerge(1); LOG.info("Done!");
public synchronized void updateIndex(Document... docs) { try { IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); IndexWriter indexWriter = new IndexWriter(index, cfg); for (Document doc : docs) { LOGGER.fine("Updating index for document: " + doc.getFieldable(AWF__ID)); indexWriter.deleteDocuments(new Term(AWF__ID, doc.getFieldable(AWF__ID).stringValue())); StringBuilder all = new StringBuilder(); for (Fieldable f : doc.getFields()) { all.append(f.stringValue()); all.append(' '); LOGGER.fine("Updated field all for "+ doc.getFieldable(AWF__ID) + " with value: " + all); doc.add(new Field("all", all.toString(), Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocuments(Arrays.asList(docs)); LOGGER.fine("reindexing Lucene..."); indexWriter.commit(); } } catch (Exception e) { LOGGER.log(Level.SEVERE, e.getMessage(), e); } try { if (indexReader != null) { indexReader.close(); } } catch (Exception e) { LOGGER.log(Level.SEVERE, e.getMessage(), e); } indexReader = IndexReader.open(index); indexSearcher = new IndexSearcher(indexReader); LOGGER.fine("reopened Lucene index handles");
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(1024.0); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); text = triple.getObject().getLiteralLexicalForm(); doc = new Document(); doc.add(new Field("uri", uri, stringType)); doc.add(new Field(searchField, text, textType)); writer.addDocument(doc); if(i++ % 10000 == 0){ writer.commit(); writer.close();
public static Document Document(File f) throws java.io.FileNotFoundException { Document doc = new Document(); doc.add(new StoredField("path", f.getPath())); doc.add(new StoredField("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE))); //create new FieldType to store term positions (TextField is not sufficiently configurable) FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); Field contentsField = new Field("contents", new FileReader(f), ft); doc.add(contentsField); return doc; }
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter writer = new IndexWriter(directory, conf); for (String term : terms) { Document doc = new Document(); doc.add(new Field("chars", term, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), terms.length); for (int i = 0; i < topDocs.scoreDocs.length; i++){ System.out.println("Id: " + topDocs.scoreDocs[i].doc + " Val: " + searcher.doc(topDocs.scoreDocs[i].doc).get("chars"));
@Test public void storesPositionCorrectly() throws Exception { indexDirectory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); //use of Version, need to look at this. config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(indexDirectory, config); String str = "קשת רשת דבשת מיץ יבשת יבלת גחלת גדר אינציקלופדיה חבר"; Document doc = new Document(); doc.add(new Field("Text", str, fieldType)); writer.addDocument(doc); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(indexDirectory)); runQuery("\"קשת\"", 0); runQuery("\"אינציקלופדיה\"", 8); runQuery("\"חבר\"", 9); indexDirectory.close(); }
try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer); IndexWriter iWriter = new IndexWriter(directory, indexWriterConfig); Document doc = new Document(); doc.add(new Field("id", id, simpleIndexType)); doc.add(new Field("className", className, simpleIndexType)); field = new Field("title", title, indexedTextType); field.setBoost(10f * weight); doc.add(field); field = new Field("subTitles", subTitles, indexedTextType); field.setBoost(6f * weight); doc.add(field); field = new Field("content", content, indexedTextType); field.setBoost(weight); doc.add(field); field = new Field("tag", tagId, simpleIndexType); field.setBoost(weight); doc.add(field); field = new Field("iconFileIdentifier", iconFileIdentifier, TextField.TYPE_STORED); doc.add(field);
IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); ft.freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.add(field); break; field.setStringValue(surfaceForm.utf8ToString()); writer.addDocument(doc); count++; writer.rollback(); success = true; } finally {
public void addDocument(final String id, final String json) throws IOException { final Document doc = new Document(); doc.add(new StringField(DEFAULT_ID_FIELD, id, Store.YES)); final FieldType sirenFieldType = new FieldType(); sirenFieldType.setIndexed(true); sirenFieldType.setTokenized(true); sirenFieldType.setOmitNorms(true); sirenFieldType.setStored(false); sirenFieldType.setStoreTermVectors(false); doc.add(new Field(DEFAULT_SIREN_FIELD, json, sirenFieldType)); writer.addDocument(doc); }
@Override public void store(URI type, Record record) throws IOException, IllegalStateException { writingOperations.get(type).incrementAndGet(); IndexWriter indexWriter = writers.get(type); String uri; try { uri = record.getID().toString(); } catch (NullPointerException e) { throw new IOException(e); } logger.debug(String.format("Inserting %s", uri)); Document doc = new Document(); doc.add(new Field(KEY_NAME, uri, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(VALUE_NAME, serializeRecord(record, serializer), Field.Store.YES)); // use "update" instead of "add" to avoid duplicates indexWriter.updateDocument(new Term(KEY_NAME, uri), doc); }
private void indexPut(final Map<String, String> entries) throws IOException { try { int numDeleted = 0; int numUpdated = 0; synchronized (this.luceneWriter) { for (final Map.Entry<String, String> entry : entries.entrySet()) { if (entry.getValue() == null) { this.luceneWriter.deleteDocuments(new Term(KEY_FIELD, entry.getKey())); ++numDeleted; } else { final Document doc = new Document(); doc.add(new Field(KEY_FIELD, entry.getKey(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(VALUE_FIELD, entry.getValue(), Field.Store.YES, Field.Index.NOT_ANALYZED)); LOGGER.debug("Document added: {}", doc.toString()); this.luceneWriter.updateDocument(new Term(KEY_FIELD, entry.getKey()), doc); ++numUpdated; } } this.luceneWriter.commit(); this.luceneReader.close(); this.luceneReader = this.luceneWriter.getReader(); } LOGGER.debug("Updated Lucene index: {} documents updated, {} documents deleted", numUpdated, numDeleted); } catch (final Throwable ex) { throw new IOException("Failed to update Lucene index with entries " + entries, ex); } }
iwriter = new IndexWriter(directory, analyzer, true , IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); directory.close(); } catch (IOException e) { e.printStackTrace();