Refine search
private static Terms getTerms( String value, int frequency ) throws IOException { TermsEnum termsEnum = mock( TermsEnum.class ); Terms terms = mock( Terms.class ); when( terms.iterator() ).thenReturn( termsEnum ); when( termsEnum.next() ).thenReturn( new BytesRef( value.getBytes() ) ).thenReturn( null ); when( termsEnum.docFreq() ).thenReturn( frequency ); return terms; }
public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next==null) {iter=null;} } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } }
@Override public void write(Fields fields) throws IOException { //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment); String lastField = null; for(String field : fields) { assert lastField == null || lastField.compareTo(field) < 0; lastField = field; //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field); Terms terms = fields.terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field)); while (true) { BytesRef term = termsEnum.next(); //if (DEBUG) System.out.println("BTTW: next term " + term); if (term == null) { break; } //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term)); termsWriter.write(term, termsEnum); } termsWriter.finish(); //if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field); } }
TermsEnum termsEnum = terms.iterator(); final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; final boolean postingsHasPayload = fieldInfo.hasPayloads(); throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j); TermsEnum postingsTermsEnum = postingsTerms.iterator(); while ((term = termsEnum.next()) != null) {
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader, IntConsumer consumer) throws IOException { Terms terms = leafReader.terms(idField); TermsEnum iterator = terms.iterator(); BytesRef idTerm; PostingsEnum postingsEnum = null; while ((idTerm = iterator.next()) != null) { if (includeInShard.test(idTerm) == false) { postingsEnum = iterator.postings(postingsEnum); int doc; while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { consumer.accept(doc); } } } }
private List<Object> distinctTerms(@Name("label") String label, @Name("key") String key) throws SchemaRuleNotFoundException, IndexNotFoundKernelException, IOException, DuplicateSchemaRuleException { SimpleIndexReader reader = getLuceneIndexReader(label,key); SortedIndexReader sortedIndexReader = new SortedIndexReader(reader, 0, Sort.INDEXORDER); Set<Object> values = new LinkedHashSet<>(100); TermsEnum termsEnum; Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader()); Terms terms = fields.terms("string"); if (terms != null) { termsEnum = terms.iterator(); while ((termsEnum.next()) != null) { values.add(termsEnum.term().utf8ToString()); } } return new ArrayList<>(values); }
private Map<String, Integer> distinctTermsCount(@Name("label") String label, @Name("key") String key) { try { SortedIndexReader sortedIndexReader = getSortedIndexReader(label, key, 0, Sort.INDEXORDER); Fields fields = MultiFields.getFields(sortedIndexReader.getIndexSearcher().getIndexReader()); Map<String, Integer> values = new HashMap<>(); TermsEnum termsEnum; Terms terms = fields.terms("string"); if (terms != null) { termsEnum = terms.iterator(); while ((termsEnum.next()) != null) { values.put(termsEnum.term().utf8ToString(), termsEnum.docFreq()); } } return values; } catch (Exception e) { throw new RuntimeException("Error collecting distinct terms of label: " + label + " and key: " + key, e); } }