Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U);
public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next==null) {iter=null;} } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } }
int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U);
return MultiFields.getFields(index.createSearcher().getIndexReader());
private List<Object> distinctTerms(@Name("label") String label, @Name("key") String key) throws SchemaRuleNotFoundException, IndexNotFoundKernelException, IOException, DuplicateSchemaRuleException { SimpleIndexReader reader = getLuceneIndexReader(label,key); SortedIndexReader sortedIndexReader = new SortedIndexReader(reader, 0, Sort.INDEXORDER); Set<Object> values = new LinkedHashSet<>(100); TermsEnum termsEnum; Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader()); Terms terms = fields.terms("string"); if (terms != null) { termsEnum = terms.iterator(); while ((termsEnum.next()) != null) { values.add(termsEnum.term().utf8ToString()); } } return new ArrayList<>(values); }
private Map<String, Integer> distinctTermsCount(@Name("label") String label, @Name("key") String key) { try { SortedIndexReader sortedIndexReader = getSortedIndexReader(label, key, 0, Sort.INDEXORDER); Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader()); Map<String, Integer> values = new HashMap<>(); TermsEnum termsEnum; Terms terms = fields.terms("string"); if (terms != null) { termsEnum = terms.iterator(); while ((termsEnum.next()) != null) { values.put(termsEnum.term().utf8ToString(), termsEnum.docFreq()); } } return values; } catch (Exception e) { throw new RuntimeException("Error collecting distinct terms of label: " + label + " and key: " + key, e); } }
SlowCompositeReaderWrapper(CompositeReader reader) throws IOException { super(); in = reader; fields = MultiFields.getFields(in); in.registerParentReader(this); if (reader.leaves().isEmpty()) { metaData = new LeafMetaData(Version.LATEST.major, Version.LATEST, null); } else { Version minVersion = Version.LATEST; for (LeafReaderContext leafReaderContext : reader.leaves()) { Version leafVersion = leafReaderContext.reader().getMetaData().getMinVersion(); if (leafVersion == null) { minVersion = null; break; } else if (minVersion.onOrAfter(leafVersion)) { minVersion = leafVersion; } } metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null); } }
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException { super(); in = reader; fields = MultiFields.getFields(in); in.registerParentReader(this); this.merging = merging; }
/** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { return getFields(r).terms(field); }
/** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { return getFields(r).terms(field); }
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException { super(); in = reader; fields = MultiFields.getFields(in); in.registerParentReader(this); this.merging = merging; }
private static ArrayList<String> getFieldInfo(String path, IndexSearcher searcher) throws IOException { ArrayList<String> list = new ArrayList<String>(); IndexReader reader = searcher.getIndexReader(); Fields fields = MultiFields.getFields(reader); if (fields != null) { for(String f : fields) { list.add(path + " " + f + " " + reader.getDocCount(f)); } } return list; }
private static ArrayList<String> getFieldInfo(String path, IndexSearcher searcher) throws IOException { ArrayList<String> list = new ArrayList<String>(); IndexReader reader = searcher.getIndexReader(); Fields fields = MultiFields.getFields(reader); if (fields != null) { for(String f : fields) { list.add(path + " " + f + " " + reader.getDocCount(f)); } } return list; }
void printIndexStats() throws IOException { Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms(LuceneDocumentGenerator.FIELD_BODY); System.out.println("Index statistics"); System.out.println("----------------"); System.out.println("documents: " + reader.numDocs()); System.out.println("documents (non-empty): " + reader.getDocCount(LuceneDocumentGenerator.FIELD_BODY)); System.out.println("unique terms: " + terms.size()); System.out.println("total terms: " + reader.getSumTotalTermFreq(LuceneDocumentGenerator.FIELD_BODY)); System.out.println("stored fields:"); FieldInfos fieldInfos = MultiFields.getMergedFieldInfos(reader); for (String fd : fields) { FieldInfo fi = fieldInfos.fieldInfo(fd); System.out.println(" " + fd + " (" + "indexOption: " + fi.getIndexOptions() + ", hasVectors: " + fi.hasVectors() + ")"); } }
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException { assertReaderStatisticsEquals(info, leftReader, rightReader); assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true); assertNormsEquals(info, leftReader, rightReader); assertStoredFieldsEquals(info, leftReader, rightReader); assertTermVectorsEquals(info, leftReader, rightReader); assertDocValuesEquals(info, leftReader, rightReader); assertDeletedDocsEquals(info, leftReader, rightReader); assertFieldInfosEquals(info, leftReader, rightReader); assertPointsEquals(info, leftReader, rightReader); }
/** * Create a QueryTermFilter for an IndexReader * @param reader the {@link IndexReader} * @throws IOException on error */ public QueryTermFilter(IndexReader reader) throws IOException { Fields mf = MultiFields.getFields(reader); for (String field : mf) { BytesRefHash terms = new BytesRefHash(); Terms t = mf.terms(field); if (t != null) { TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } } termsHash.put(field, terms); } }
Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader()); DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException { /* store document in memory index */ MemoryIndex index = new MemoryIndex(withOffsets); for (GetField getField : getFields) { String field = getField.getName(); if (fields.contains(field) == false) { // some fields are returned even when not asked for, eg. _timestamp continue; } Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer); for (Object text : getField.getValues()) { index.addField(field, text.toString(), analyzer); } } /* and read vectors from it */ return MultiFields.getFields(index.createSearcher().getIndexReader()); }
FieldStats getFieldStats(String fieldName) throws IOException { try (final ReadWriteSemaphores.Lock lock = readWriteSemaphores.acquireReadSemaphore()) { return writerAndSearcher.search((indexSearcher, taxonomyReader) -> { final Terms terms = MultiFields.getFields(indexSearcher.getIndexReader()).terms(fieldName); return terms == null ? new FieldStats() : new FieldStats(terms, fieldMap.getFieldType(null, fieldName)); }); } }
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use. * @param values The list of values to load. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { values.add(val.utf8ToString()); } }