org.apache.lucene.index.memory.MemoryIndex.addField java code examples

public void add(String field, String content) throws IOException {
  memoryIndex.addField(field, content, generalAnalyzer);
  //memoryIndex.addField(field+ALPHA_IDEOGRAPH_SUFFIX,
  //        content, alphaIdeographAnalyzer);
  count(field);
  //count(field+ALPHA_IDEOGRAPH_SUFFIX);
}

if (entry.getValue() instanceof List) {
  for (Object text : entry.getValue()) {
    index.addField(field, text.toString(), analyzer);
  index.addField(field, entry.getValue().toString(), analyzer);

/**
 * Equivalent to <code>addField(fieldName, stream, 1.0f)</code>.
 *
 * @param fieldName
 *            a name to be associated with the text
 * @param stream
 *            the token stream to retrieve tokens from
 */
public void addField(String fieldName, TokenStream stream) {
 addField(fieldName, stream, 1.0f);
}

/**
 * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
 * @param field the field to add
 * @param analyzer the analyzer to use for term analysis
 * @throws IllegalArgumentException if the field is a DocValues or Point field, as these
 *                                  structures are not supported by MemoryIndex
 */
public void addField(IndexableField field, Analyzer analyzer) {
 addField(field, analyzer, 1.0f);
}

/**
 * Iterates over the given token stream and adds the resulting terms to the index;
 * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
 * Lucene {@link org.apache.lucene.document.Field}.
 * Finally closes the token stream. Note that untokenized keywords can be added with this method via 
 * {@link #keywordTokenStream(Collection)}, the Lucene <code>KeywordTokenizer</code> or similar utilities.
 * 
 * @param fieldName
 *            a name to be associated with the text
 * @param stream
 *            the token stream to retrieve tokens from.
 */
public void addField(String fieldName, TokenStream stream) {
 addField(fieldName, stream, 0);
}

/**
 * Iterates over the given token stream and adds the resulting terms to the index;
 * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
 * Lucene {@link org.apache.lucene.document.Field}.
 * Finally closes the token stream. Note that untokenized keywords can be added with this method via
 * {@link #keywordTokenStream(Collection)}, the Lucene <code>KeywordTokenizer</code> or similar utilities.
 *
 * @param fieldName
 *            a name to be associated with the text
 * @param stream
 *            the token stream to retrieve tokens from.
 *
 * @param positionIncrementGap
 *            the position increment gap if fields with the same name are added more than once
 *
 */
public void addField(String fieldName, TokenStream stream, int positionIncrementGap) {
 addField(fieldName, stream, positionIncrementGap, 1);
}

/**
 * Iterates over the given token stream and adds the resulting terms to the index;
 * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
 * Lucene {@link org.apache.lucene.document.Field}.
 * Finally closes the token stream. Note that untokenized keywords can be added with this method via 
 * {@link #keywordTokenStream(Collection)}, the Lucene <code>KeywordTokenizer</code> or similar utilities.
 * 
 * @param fieldName
 *            a name to be associated with the text
 * @param stream
 *            the token stream to retrieve tokens from.
 * @param boost
 *            the boost factor for hits for this field
 *  
 * @see org.apache.lucene.document.Field#setBoost(float)
 */
public void addField(String fieldName, TokenStream stream, float boost) {
 addField(fieldName, stream, boost, 0);
}

/**
 * Builds a MemoryIndex from a lucene {@link Document} using an analyzer
 * @param document the document to index
 * @param analyzer the analyzer to use
 * @param storeOffsets <code>true</code> if offsets should be stored
 * @param storePayloads <code>true</code> if payloads should be stored
 * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
 * @return a MemoryIndex
 */
public static MemoryIndex fromDocument(Document document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
 MemoryIndex mi = new MemoryIndex(storeOffsets, storePayloads, maxReusedBytes);
 for (IndexableField field : document) {
  mi.addField(field, analyzer);
 }
 return mi;
}

/**
 * Builds a MemoryIndex from a lucene {@link Document} using an analyzer
 * @param document the document to index
 * @param analyzer the analyzer to use
 * @param storeOffsets <code>true</code> if offsets should be stored
 * @param storePayloads <code>true</code> if payloads should be stored
 * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
 * @return a MemoryIndex
 */
public static MemoryIndex fromDocument(Iterable<? extends IndexableField> document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
 MemoryIndex mi = new MemoryIndex(storeOffsets, storePayloads, maxReusedBytes);
 for (IndexableField field : document) {
  mi.addField(field, analyzer);
 }
 return mi;
}

/**
 * Convenience method; Tokenizes the given field text and adds the resulting
 * terms to the index; Equivalent to adding an indexed non-keyword Lucene
 * {@link org.apache.lucene.document.Field} that is tokenized, not stored,
 * termVectorStored with positions (or termVectorStored with positions and offsets),
 * 
 * @param fieldName
 *            a name to be associated with the text
 * @param text
 *            the text to tokenize and index.
 * @param analyzer
 *            the analyzer to use for tokenization
 */
public void addField(String fieldName, String text, Analyzer analyzer) {
 if (fieldName == null)
  throw new IllegalArgumentException("fieldName must not be null");
 if (text == null)
  throw new IllegalArgumentException("text must not be null");
 if (analyzer == null)
  throw new IllegalArgumentException("analyzer must not be null");
 
 TokenStream stream = analyzer.tokenStream(fieldName, text);
 addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
}

 public class MemoryIndexTest {

  private static final String DATE_FIELD = "date";
  MemoryIndex index = new MemoryIndex();

  NumericTokenStream numericTokenStream = new NumericTokenStream();

  @Before
  public void init() {
    index.addField(DATE_FIELD, numericTokenStream.setIntValue(20141116));
  }

  @Test
  public void testRange() {
    Query query = NumericRangeQuery.newIntRange(DATE_FIELD, 20141115, 20141118, true, true);
    assertTrue(index.search(query) > 0);

  }
}

private SingletonDocumentBatch(Collection<InputDocument> documents, Similarity similarity) {
  super(documents, similarity);
  assert documents.size() == 1;
  memoryindex.setSimilarity(similarity);
  for (InputDocument doc : documents) {
    for (IndexableField field : doc.getDocument()) {
      memoryindex.addField(field, doc.getAnalyzers());
    }
  }
  memoryindex.freeze();
  reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}

private static HashSet<String> performSearch(Analyzer a) throws IOException {
  HashSet<String> results = new HashSet<>();
  for (File file : getTestFiles()) {
    MemoryIndex memoryIndex = new MemoryIndex(true);
    final List<String> lines = Files.readAllLines(file.toPath(), Charset.forName("UTF-8"));
    memoryIndex.addField("title", lines.get(0), a);
    StringBuilder sb = new StringBuilder();
    for (String line : lines) {
      sb.append(line);
    }
    memoryIndex.addField("content", sb.toString(), a);
    IndexSearcher searcher = memoryIndex.createSearcher();
    ExistsCollector collector = new ExistsCollector();
    searcher.search(new TermQuery(new Term("content", "אני")), collector);
    if (collector.exists()) {
      results.add(file.getName());
    }
  }
  return results;
}

private boolean matchField(Object iLeft, Object iRight, OLuceneFullTextIndex index, MemoryIndex memoryIndex)
  throws IOException, ParseException {
 for (IndexableField field : index.buildDocument(iLeft).getFields()) {
  memoryIndex.addField(field, index.indexAnalyzer());
 }
 return memoryIndex.search(index.buildQuery(iRight)) > 0.0f;
}

/**
 * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
 * @param field the field to add
 * @param analyzer the analyzer to use for term analysis
 * @param boost a field boost
 * @throws IllegalArgumentException if the field is a DocValues or Point field, as these
 *                                  structures are not supported by MemoryIndex
 */
public void addField(IndexableField field, Analyzer analyzer, float boost) {
 if (field.fieldType().docValuesType() != DocValuesType.NONE)
  throw new IllegalArgumentException("MemoryIndex does not support DocValues fields");
 if (analyzer == null) {
  addField(field.name(), field.tokenStream(null, null), boost);
 }
 else {
  addField(field.name(), field.tokenStream(analyzer, null), boost,
    analyzer.getPositionIncrementGap(field.name()), analyzer.getOffsetGap(field.name()));
 }
}

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
    throws IOException {
  /* store document in memory index */
  MemoryIndex index = new MemoryIndex(withOffsets);
  for (GetField getField : getFields) {
    String field = getField.getName();
    if (fields.contains(field) == false) {
      // some fields are returned even when not asked for, eg. _timestamp
      continue;
    }
    Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
    for (Object text : getField.getValues()) {
      index.addField(field, text.toString(), analyzer);
    }
  }
  /* and read vectors from it */
  return MultiFields.getFields(index.createSearcher().getIndexReader());
}

public boolean isDeleted(Document document, Object key, OIdentifiable value) {
 boolean match = false;
 List<String> strings = deleted.get(value.getIdentity().toString());
 if (strings != null) {
  MemoryIndex memoryIndex = new MemoryIndex();
  for (String string : strings) {
   Query q = engine.deleteQuery(string, value);
   memoryIndex.reset();
   for (IndexableField field : document.getFields()) {
    memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer());
   }
   match = match || (memoryIndex.search(q) > 0.0f);
  }
  return match;
 }
 return match;
}

MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
  for (IndexableField field : d.getFields()) {
    if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
      continue;
    }
    try {
      // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
      // like the indexer does
      try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
        if (tokenStream != null) {
          memoryIndex.addField(field.name(), tokenStream, field.boost());
        }
       }
    } catch (IOException e) {
      throw new ElasticsearchException("Failed to create token stream", e);
    }
  }
  return memoryIndex;
}

@Override
public Object execute(Object iThis, OIdentifiable iCurrentRecord, Object iCurrentResult, Object[] params, OCommandContext ctx) {
 OElement element = iThis instanceof OElement ? (OElement) iThis : ((OResult) iThis).toElement();
 String indexName = (String) params[0];
 OLuceneFullTextIndex index = searchForIndex(ctx, indexName);
 if (index == null)
  return false;
 String query = (String) params[1];
 MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx);
 List<Object> key = index.getDefinition().getFields().stream().map(s -> element.getProperty(s)).collect(Collectors.toList());
 try {
  for (IndexableField field : index.buildDocument(key).getFields()) {
   memoryIndex.addField(field, index.indexAnalyzer());
  }
  ODocument metadata = getMetadata(params);
  OLuceneKeyAndMetadata keyAndMetadata = new OLuceneKeyAndMetadata(
    new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata);
  return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f;
 } catch (ParseException e) {
  OLogManager.instance().error(this, "error occurred while building query", e);
 }
 return null;
}

@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
  MemoryIndex memoryIndex = cache.get();
  for (IndexableField field : parsedDocument.rootDoc().getFields()) {
    if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
      continue;
    }
    try {
      Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
      // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
      // like the indexer does
      try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
        if (tokenStream != null) {
          memoryIndex.addField(field.name(), tokenStream, field.boost());
        }
       }
    } catch (Exception e) {
      throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
    }
  }
  context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}

Javadoc

Equivalent to addField(fieldName, stream, 1.0f).

Popular methods of MemoryIndex

createSearcher
Creates and returns a searcher that can be used to execute arbitrary Lucene queries and to collect t
<init>
Expert: This constructor accepts an upper limit for the number of bytes that should be reused if thi
fromDocument
Builds a MemoryIndex from a lucene Document using an analyzer
reset
Resets the MemoryIndex to its initial state and recycles all internal buffers.
search
Convenience method that efficiently returns the relevance score by matching this index against the g
createFieldInfo
freeze
Prepares the MemoryIndex for querying in a non-lazy way. After calling this you can query the Memory
getInfo
setSimilarity
Set the Similarity to be used for calculating field norms
storeDocValues
storePointValues
storeTerms

Popular in Java

Start an intent from android
setScale (BigDecimal)
notifyDataSetChanged (ArrayAdapter)
getExternalFilesDir (Context)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Github Copilot alternatives

How to use addFieldmethodin org.apache.lucene.index.memory.MemoryIndex

Best Java code snippets using org.apache.lucene.index.memory.MemoryIndex.addField (Showing top 20 results out of 315)

How to use
addField
method
in
org.apache.lucene.index.memory.MemoryIndex