org.apache.lucene.document.Field java code examples

Refine search

private void addPatterns(String id, Map<Integer, Set<E>> p, boolean commit) {
 try{
  setIndexWriter();
  Document doc = new Document();
  doc.add(new StringField("sentid", id, Field.Store.YES));
  doc.add(new Field("patterns", getBytes(p), LuceneFieldType.NOT_INDEXED));
  indexWriter.addDocument(doc);
  if(commit){
   indexWriter.commit();
   //closeIndexWriter();
  }
 }catch(IOException e){
  throw new RuntimeException(e);
 }
}

@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
 if (fieldType().indexOptions() == IndexOptions.NONE) {
 if (!fieldType().tokenized()) {
  if (stringValue() != null) {
   if (!(reuse instanceof StringTokenStream)) {
   ((StringTokenStream) reuse).setValue(stringValue());
   return reuse;
  } else if (binaryValue() != null) {
   if (!(reuse instanceof BinaryTokenStream)) {
   ((BinaryTokenStream) reuse).setValue(binaryValue());
   return reuse;
  } else {
 } else if (readerValue() != null) {
  return analyzer.tokenStream(name(), readerValue());
 } else if (stringValue() != null) {
  return analyzer.tokenStream(name(), stringValue());

for (int i = 0; i < updates.length; i++) {
 final Field f = updates[i];
 final DocValuesType dvType = f.fieldType().docValuesType();
 if (dvType == null) {
  throw new NullPointerException("DocValuesType must not be null (field: \"" + f.name() + "\")");
  throw new IllegalArgumentException("can only update NUMERIC or BINARY fields! field=" + f.name());
 if (globalFieldNumberMap.contains(f.name(), dvType) == false) {
  globalFieldNumberMap.addOrGet(f.name(), -1, dvType, 0, 0, 0, f.name().equals(config.softDeletesField));
  assert globalFieldNumberMap.contains(f.name(), dvType);
 if (config.getIndexSortFields().contains(f.name())) {
  throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort());
   Long value = (Long)f.numericValue();
   dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), value);
   break;
  case BINARY:
   dvUpdates[i] = new BinaryDocValuesUpdate(term, f.name(), f.binaryValue());
   break;
  default:
   throw new IllegalArgumentException("can only update NUMERIC or BINARY fields: field=" + f.name() + ", type=" + dvType);

private IndexOp remove(final ArtifactContext ac)
  throws IOException
{
 if (ac != null) {
  final String uinfo = ac.getArtifactInfo().getUinfo();
  // add artifact deletion marker
  final Document doc = new Document();
  doc.add(new Field(ArtifactInfo.DELETED, uinfo, Field.Store.YES, Field.Index.NO));
  doc.add(new Field(ArtifactInfo.LAST_MODIFIED, //
    Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NO));
  IndexWriter w = context.getIndexWriter();
  w.addDocument(doc);
  w.deleteDocuments(new Term(ArtifactInfo.UINFO, uinfo));
  return IndexOp.DELETED;
 }
 return IndexOp.NOOP;
}

public static void main( String[] args ) throws Exception {
 Analyzer analyzer = new WhitespaceAnalyzer();
 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
 Query query = parser.parse( "a x:b" );
 FieldQuery fieldQuery = new FieldQuery( query, true, false );
 
 Directory dir = new RAMDirectory();
 IndexWriter writer = new IndexWriter( dir, analyzer, MaxFieldLength.LIMITED );
 Document doc = new Document();
 doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
 doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
 writer.addDocument( doc );
 writer.close();
 
 IndexReader reader = IndexReader.open( dir, true );
 FieldTermStack ftl = new FieldTermStack( reader, 0, "f", fieldQuery );
 reader.close();
}

public RAMDirectory buildIndex() throws IOException {
  RAMDirectory ramDirectory = new RAMDirectory();
  Document doc = new Document();
  Field[] fields = new Field[]{new Field("firstName", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS),
      new Field("lastName", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS),
      new Field("address", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS),
      new Field("email", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS),
      new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)};
  addFieldsToDocument(doc, fields);
  BufferedReader reader = new BufferedReader(new FileReader(namesFile));
  IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
  String line;
  while ((line = reader.readLine()) != null) {
    String[] personData = getPersonData(line);
    setFieldData(personData, fields);
    indexWriter.addDocument(doc);
  }
  indexWriter.close();
  return ramDirectory;
}

int numDocs = reader.numDocs();
progress.setCurrentState(State.RoSelection);
boolean hasDeletions = reader.hasDeletions();
  Document document = reader.document(i);
  document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
  iw.addDocument(document);
iw.commit();
iw.close();
PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);
iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
StringBuilder sb = new StringBuilder(256);
  sb.delete(0, sb.length());
  for (int j = 0; j < numReferenceObjectsUsed; j++) {
    sb.append(hits.doc(j).getValues("ro-id")[0]);
    sb.append(' ');
  document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
  iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);

Document doc = new Document();
doc.add(new Field(QueryBuilder.FULLPATH, path,
         string_ft_nstored_nanalyzed_norms));
StringWriter xrefOut = new StringWriter();
analyzer.analyze(doc, getStreamSource(path), xrefOut);
Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);

String indexPath = cmdline.getOptionValue(INDEX_OPTION);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
config.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, config);
int cnt = 0;
Status status;
  Document doc = new Document();
  doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
  doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
  doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));
  doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));
  doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
  writer.addDocument(doc);
  if (cnt % 100000 == 0) {
   LOG.info(cnt + " statuses indexed");
  writer.forceMerge(1);
  LOG.info("Done!");

public synchronized void updateIndex(Document... docs) {
  try {
    IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
    IndexWriter indexWriter = new IndexWriter(index, cfg);
    for (Document doc : docs) {
      LOGGER.fine("Updating index for document: " + doc.getFieldable(AWF__ID));
      indexWriter.deleteDocuments(new Term(AWF__ID, doc.getFieldable(AWF__ID).stringValue()));
      StringBuilder all = new StringBuilder();
      for (Fieldable f : doc.getFields()) {
        all.append(f.stringValue());
        all.append(' ');
      LOGGER.fine("Updated field all for "+ doc.getFieldable(AWF__ID) + " with value: " + all);
      doc.add(new Field("all", all.toString(), Field.Store.NO, Field.Index.ANALYZED));
    indexWriter.addDocuments(Arrays.asList(docs));
    LOGGER.fine("reindexing Lucene...");
    indexWriter.commit();
    } } catch (Exception e) { LOGGER.log(Level.SEVERE, e.getMessage(), e); }
    try { if (indexReader != null) {
      indexReader.close();
    } } catch (Exception e) { LOGGER.log(Level.SEVERE, e.getMessage(), e); }
    indexReader = IndexReader.open(index);
    indexSearcher = new IndexSearcher(indexReader);
    LOGGER.fine("reopened Lucene index handles");

IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriterConfig.setRAMBufferSizeMB(1024.0);
indexWriterConfig.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
FieldType stringType = new FieldType(StringField.TYPE_STORED);
stringType.setStoreTermVectors(false);
FieldType textType = new FieldType(TextField.TYPE_STORED);
textType.setStoreTermVectors(false);
  text = triple.getObject().getLiteralLexicalForm();
  doc = new Document();
  doc.add(new Field("uri", uri, stringType));
  doc.add(new Field(searchField, text, textType));
  writer.addDocument(doc);
  if(i++ % 10000 == 0){
writer.commit();
writer.close();

public static Document Document(File f)
   throws java.io.FileNotFoundException {
 Document doc = new Document();
 doc.add(new StoredField("path", f.getPath()));
 doc.add(new StoredField("modified",
          DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE)));
 
 //create new FieldType to store term positions (TextField is not sufficiently configurable)
 FieldType ft = new FieldType();
 ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
 ft.setTokenized(true);
 ft.setStoreTermVectors(true);
 ft.setStoreTermVectorPositions(true);
 Field contentsField = new Field("contents", new FileReader(f), ft);
 doc.add(contentsField);
 return doc;
}

IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
IndexWriter writer = new IndexWriter(directory, conf);
for (String term : terms) {
 Document doc = new Document();
 doc.add(new Field("chars", term, Field.Store.YES, Field.Index.ANALYZED));
 writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), terms.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++){
  System.out.println("Id: " + topDocs.scoreDocs[i].doc + " Val: " + searcher.doc(topDocs.scoreDocs[i].doc).get("chars"));

@Test
public void storesPositionCorrectly() throws Exception {
  indexDirectory = new RAMDirectory();
  IndexWriterConfig config = new IndexWriterConfig(analyzer); //use of Version, need to look at this.
  config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  IndexWriter writer = new IndexWriter(indexDirectory, config);
  String str = "קשת רשת דבשת מיץ יבשת יבלת גחלת גדר אינציקלופדיה חבר";
  Document doc = new Document();
  doc.add(new Field("Text", str, fieldType));
  writer.addDocument(doc);
  writer.close();
  searcher = new IndexSearcher(DirectoryReader.open(indexDirectory));
  runQuery("\"קשת\"", 0);
  runQuery("\"אינציקלופדיה\"", 8);
  runQuery("\"חבר\"", 9);
  indexDirectory.close();
}

try {
  IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer);
  IndexWriter iWriter = new IndexWriter(directory, indexWriterConfig);
  Document doc = new Document();
  doc.add(new Field("id", id, simpleIndexType));
  doc.add(new Field("className", className, simpleIndexType));
    field = new Field("title", title, indexedTextType);
    field.setBoost(10f * weight);
    doc.add(field);
    field = new Field("subTitles", subTitles, indexedTextType);
    field.setBoost(6f * weight);
    doc.add(field);
  field = new Field("content", content, indexedTextType);
  field.setBoost(weight);
  doc.add(field);
      field = new Field("tag", tagId, simpleIndexType);
      field.setBoost(weight);
      doc.add(field);
    field = new Field("iconFileIdentifier", iconFileIdentifier, TextField.TYPE_STORED);
    doc.add(field);

IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
IndexWriter writer = new IndexWriter(dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(true);
ft.freeze();
Document doc = new Document();
Field field = new Field("body", "", ft);
doc.add(field);
   break;
  field.setStringValue(surfaceForm.utf8ToString());
  writer.addDocument(doc);
  count++;
 writer.rollback();
 success = true;
} finally {

public void addDocument(final String id, final String json)
throws IOException {
 final Document doc = new Document();
 doc.add(new StringField(DEFAULT_ID_FIELD, id, Store.YES));
 final FieldType sirenFieldType = new FieldType();
 sirenFieldType.setIndexed(true);
 sirenFieldType.setTokenized(true);
 sirenFieldType.setOmitNorms(true);
 sirenFieldType.setStored(false);
 sirenFieldType.setStoreTermVectors(false);
 doc.add(new Field(DEFAULT_SIREN_FIELD, json, sirenFieldType));
 writer.addDocument(doc);
}

@Override
public void store(URI type, Record record) throws IOException, IllegalStateException {
  writingOperations.get(type).incrementAndGet();
  IndexWriter indexWriter = writers.get(type);
  String uri;
  try {
    uri = record.getID().toString();
  } catch (NullPointerException e) {
    throw new IOException(e);
  }
  logger.debug(String.format("Inserting %s", uri));
  Document doc = new Document();
  doc.add(new Field(KEY_NAME, uri, Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc.add(new Field(VALUE_NAME, serializeRecord(record, serializer), Field.Store.YES));
  // use "update" instead of "add" to avoid duplicates
  indexWriter.updateDocument(new Term(KEY_NAME, uri), doc);
}

private void indexPut(final Map<String, String> entries) throws IOException {
  try {
    int numDeleted = 0;
    int numUpdated = 0;
    synchronized (this.luceneWriter) {
      for (final Map.Entry<String, String> entry : entries.entrySet()) {
        if (entry.getValue() == null) {
          this.luceneWriter.deleteDocuments(new Term(KEY_FIELD, entry.getKey()));
          ++numDeleted;
        } else {
          final Document doc = new Document();
          doc.add(new Field(KEY_FIELD, entry.getKey(), Field.Store.YES,
              Field.Index.NOT_ANALYZED));
          doc.add(new Field(VALUE_FIELD, entry.getValue(), Field.Store.YES,
              Field.Index.NOT_ANALYZED));
          LOGGER.debug("Document added: {}", doc.toString());
          this.luceneWriter.updateDocument(new Term(KEY_FIELD, entry.getKey()), doc);
          ++numUpdated;
        }
      }
      this.luceneWriter.commit();
      this.luceneReader.close();
      this.luceneReader = this.luceneWriter.getReader();
    }
    LOGGER.debug("Updated Lucene index: {} documents updated, {} documents deleted",
        numUpdated, numDeleted);
  } catch (final Throwable ex) {
    throw new IOException("Failed to update Lucene index with entries " + entries, ex);
  }
}

iwriter = new IndexWriter(directory, analyzer, true , IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
iwriter.addDocument(doc);
iwriter.close();
    directory.close();
  } catch (IOException e) {
    e.printStackTrace();

Javadoc

Expert: directly create a field for a document. Most users should use one of the sugar subclasses: IntField, LongField, FloatField, DoubleField, BinaryDocValuesField, NumericDocValuesField, SortedDocValuesField, StringField, TextField, StoredField.

A field is a section of a Document. Each field has three parts: name, type and value. Values may be text (String, Reader or pre-analyzed TokenStream), binary (byte[]), or numeric (a Number). Fields are optionally stored in the index, so that they may be returned with hits on the document.

NOTE: the field type is an IndexableFieldType. Making changes to the state of the IndexableFieldType will impact any Field it is used in. It is strongly recommended that no changes be made after Field instantiation.

Most used methods

<init>
Create field with binary value.NOTE: the provided byte[] is not copied so be sure not to change it u
stringValue
The value of the field as a String, or null. If null, the Reader value or binary value is used. Exac
name
The name of the field (e.g., "date", "subject", "title", or "body") as an interned string.
setBoost
Sets the boost factor hits on this field. This value will be multiplied into the score of all hits o
setStringValue
Expert: change the value of this field. This can be used during indexing to re-use a single Field i
setLongValue
Expert: change the value of this field. See #setStringValue(String).
fieldType
Returns the FieldType for this field.
readerValue
The value of the field as a Reader, or null. If null, the String value or binary value is used. Exac
setOmitNorms
setValue
Expert: change the value of this field. See setValue(String).
Text
Constructs a String-valued Field that is tokenized and indexed, and is stored in the index, for retu
binaryValue
The value of the field in Binary, or null. If null, the Reader value, String value, or TokenStream v

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
notifyDataSetChanged (ArrayAdapter)
startActivity (Activity)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
JFileChooser (javax.swing)
Top 12 Jupyter Notebook extensions

How to useField in org.apache.lucene.document

Best Java code snippets using org.apache.lucene.document.Field (Showing top 20 results out of 1,935)

Refine search

How to use
Field
in
org.apache.lucene.document