org.apache.lucene.index.MultiFields.getFields java code examples

Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.U);

public void listTokens(int freq) throws IOException {
  IndexReader ireader = null;
  TermsEnum iter = null;
  Terms terms;
  try {
    ireader = DirectoryReader.open(indexDirectory);
    int numDocs = ireader.numDocs();
    if (numDocs > 0) {
      Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
      terms = uFields.terms(QueryBuilder.DEFS);
      iter = terms.iterator(); // init uid iterator
    }
    while (iter != null && iter.term() != null) {
      //if (iter.term().field().startsWith("f")) {
      if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
        LOGGER.warning(iter.term().utf8ToString());
      }
      BytesRef next = iter.next();
      if (next==null) {iter=null;}
    }
  } finally {
    if (ireader != null) {
      try {
        ireader.close();
      } catch (IOException e) {
        LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
      }
    }
  }
}

int numDocs = reader.numDocs();
if (numDocs > 0) {
  Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
  terms = uFields.terms(QueryBuilder.U);

return MultiFields.getFields(index.createSearcher().getIndexReader());

private List<Object> distinctTerms(@Name("label") String label, @Name("key") String key) throws SchemaRuleNotFoundException, IndexNotFoundKernelException, IOException, DuplicateSchemaRuleException {
  SimpleIndexReader reader = getLuceneIndexReader(label,key);
  SortedIndexReader sortedIndexReader = new SortedIndexReader(reader, 0, Sort.INDEXORDER);
  Set<Object> values = new LinkedHashSet<>(100);
  TermsEnum termsEnum;
  Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader());
  Terms terms = fields.terms("string");
  if (terms != null) {
    termsEnum = terms.iterator();
    while ((termsEnum.next()) != null) {
      values.add(termsEnum.term().utf8ToString());
    }
  }
  return new ArrayList<>(values);
}

private Map<String, Integer> distinctTermsCount(@Name("label") String label, @Name("key") String key) {
  try {
    SortedIndexReader sortedIndexReader = getSortedIndexReader(label, key, 0, Sort.INDEXORDER);
    Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader());
    Map<String, Integer> values = new HashMap<>();
    TermsEnum termsEnum;
    Terms terms = fields.terms("string");
    if (terms != null) {
      termsEnum = terms.iterator();
      while ((termsEnum.next()) != null) {
        values.put(termsEnum.term().utf8ToString(), termsEnum.docFreq());
      }
    }
    return values;
  } catch (Exception e) {
    throw new RuntimeException("Error collecting distinct terms of label: " + label + " and key: " + key, e);
  }
}

SlowCompositeReaderWrapper(CompositeReader reader) throws IOException {
  super();
  in = reader;
  fields = MultiFields.getFields(in);
  in.registerParentReader(this);
  if (reader.leaves().isEmpty()) {
    metaData = new LeafMetaData(Version.LATEST.major, Version.LATEST, null);
  } else {
    Version minVersion = Version.LATEST;
    for (LeafReaderContext leafReaderContext : reader.leaves()) {
      Version leafVersion = leafReaderContext.reader().getMetaData().getMinVersion();
      if (leafVersion == null) {
        minVersion = null;
        break;
      } else if (minVersion.onOrAfter(leafVersion)) {
        minVersion = leafVersion;
      }
    }
    metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
  }
}

SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
 super();
 in = reader;
 fields = MultiFields.getFields(in);
 in.registerParentReader(this);
 this.merging = merging;
}

/**  This method may return null if the field does not exist.*/
public static Terms getTerms(IndexReader r, String field) throws IOException {
 return getFields(r).terms(field);
}

/**  This method may return null if the field does not exist.*/
public static Terms getTerms(IndexReader r, String field) throws IOException {
 return getFields(r).terms(field);
}

SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
 super();
 in = reader;
 fields = MultiFields.getFields(in);
 in.registerParentReader(this);
 this.merging = merging;
}

private static ArrayList<String> getFieldInfo(String path, IndexSearcher searcher) throws IOException {
  ArrayList<String> list = new ArrayList<String>();
  IndexReader reader = searcher.getIndexReader();
  Fields fields = MultiFields.getFields(reader);
  if (fields != null) {
    for(String f : fields) {
      list.add(path + " " + f + " " + reader.getDocCount(f));
    }
  }
  return list;
}

private static ArrayList<String> getFieldInfo(String path, IndexSearcher searcher) throws IOException {
  ArrayList<String> list = new ArrayList<String>();
  IndexReader reader = searcher.getIndexReader();
  Fields fields = MultiFields.getFields(reader);
  if (fields != null) {
    for(String f : fields) {
      list.add(path + " " + f + " " + reader.getDocCount(f));
    }
  }
  return list;
}

void printIndexStats() throws IOException {
 Fields fields = MultiFields.getFields(reader);
 Terms terms = fields.terms(LuceneDocumentGenerator.FIELD_BODY);
 System.out.println("Index statistics");
 System.out.println("----------------");
 System.out.println("documents:             " + reader.numDocs());
 System.out.println("documents (non-empty): " + reader.getDocCount(LuceneDocumentGenerator.FIELD_BODY));
 System.out.println("unique terms:          " + terms.size());
 System.out.println("total terms:           " + reader.getSumTotalTermFreq(LuceneDocumentGenerator.FIELD_BODY));
 System.out.println("stored fields:");
 FieldInfos fieldInfos = MultiFields.getMergedFieldInfos(reader);
 for (String fd : fields) {
  FieldInfo fi = fieldInfos.fieldInfo(fd);
  System.out.println("  " + fd + " (" + "indexOption: " + fi.getIndexOptions() +
    ", hasVectors: " + fi.hasVectors() + ")");
 }
}

public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
 assertReaderStatisticsEquals(info, leftReader, rightReader);
 assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
 assertNormsEquals(info, leftReader, rightReader);
 assertStoredFieldsEquals(info, leftReader, rightReader);
 assertTermVectorsEquals(info, leftReader, rightReader);
 assertDocValuesEquals(info, leftReader, rightReader);
 assertDeletedDocsEquals(info, leftReader, rightReader);
 assertFieldInfosEquals(info, leftReader, rightReader);
 assertPointsEquals(info, leftReader, rightReader);
}

/**
 * Create a QueryTermFilter for an IndexReader
 * @param reader the {@link IndexReader}
 * @throws IOException on error
 */
public QueryTermFilter(IndexReader reader) throws IOException {
  Fields mf = MultiFields.getFields(reader);
  for (String field : mf) {
    BytesRefHash terms = new BytesRefHash();
    Terms t = mf.terms(field);
    if (t != null) {
      TermsEnum te = t.iterator();
      BytesRef term;
      while ((term = te.next()) != null) {
        terms.add(term);
      }
    }
    termsHash.put(field, terms);
  }
}

Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
    throws IOException {
  /* store document in memory index */
  MemoryIndex index = new MemoryIndex(withOffsets);
  for (GetField getField : getFields) {
    String field = getField.getName();
    if (fields.contains(field) == false) {
      // some fields are returned even when not asked for, eg. _timestamp
      continue;
    }
    Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
    for (Object text : getField.getValues()) {
      index.addField(field, text.toString(), analyzer);
    }
  }
  /* and read vectors from it */
  return MultiFields.getFields(index.createSearcher().getIndexReader());
}

FieldStats getFieldStats(String fieldName) throws IOException {
  try (final ReadWriteSemaphores.Lock lock = readWriteSemaphores.acquireReadSemaphore()) {
    return writerAndSearcher.search((indexSearcher, taxonomyReader) -> {
      final Terms terms = MultiFields.getFields(indexSearcher.getIndexReader()).terms(fieldName);
      return terms == null ? new FieldStats() : new FieldStats(terms, fieldMap.getFieldType(null, fieldName));
    });
  }
}

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException {
 Fields fields = MultiFields.getFields(reader);
 org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
 if (terms == null) return;
 TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes());
 BytesRef val;
 while ((val = prefixes.next()) != null) {
  values.add(val.utf8ToString());
 }
}

Javadoc

Returns a single Fields instance for this reader, merging fields/terms/docs/positions on the fly. This method will return null if the reader has no postings.

NOTE: this is a slow way to access postings. It's better to get the sub-readers and iterate through them yourself.

Popular methods of MultiFields

getTerms
This method may return null if the field does not exist or if it has no terms.
getLiveDocs
Returns a single Bits instance for this reader, merging live Documents on the fly. This method will
getIndexedFields
Call this to get the (merged) FieldInfos representing the set of indexed fields only for a composit
getMergedFieldInfos
Call this to get the (merged) FieldInfos for a composite reader. NOTE: the returned field numbers wi
getTermDocsEnum
Returns PostingsEnum for the specified field and term, with control over whether freqs are required.
getTermPositionsEnum
Returns PostingsEnum for the specified field and term, with control over whether offsets and payload
<init>
Expert: construct a new MultiFields instance directly.

Popular in Java

Running tasks concurrently on multiple threads
findViewById (Activity)
compareTo (BigDecimal)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Github Copilot alternatives

How to use getFieldsmethodin org.apache.lucene.index.MultiFields

Best Java code snippets using org.apache.lucene.index.MultiFields.getFields (Showing top 20 results out of 315)

How to use
getFields
method
in
org.apache.lucene.index.MultiFields