Refine search
@Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; if (reader.maxDoc() == reader.docFreq(new Term(field))) { return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet res = new OpenBitSet(reader.maxDoc()); TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); try { do { Term term = termEnum.term(); if (term==null || term.field() != field) break; termDocs.seek (termEnum); while (termDocs.next()) { res.fastSet(termDocs.doc()); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } res.flip(0, reader.maxDoc()); return res; } }
NavigableMap<String, SkippableIterable<DocTermMatch>> result = new TreeMap<String, SkippableIterable<DocTermMatch>>(); try { terms = reader.terms(new Term(field, termFrom)); Term rightBoundary = new Term(field, termTo); int numberOfTerms = 0; if (terms.term() != null) { do { Term term = terms.term(); if (term.compareTo(rightBoundary) >= 0) { break; } while (terms.next());
public static void dumpTags(File file, String field, long maxDocs) throws IOException { Directory dir = FSDirectory.open(file); IndexReader reader = IndexReader.open(dir, true); TermEnum te = reader.terms(new Term(field, "")); do { Term term = te.term(); if (term == null || term.field().equals(field) == false) { break; } System.err.printf("%s %d\n", term.text(), te.docFreq()); } while (te.next()); te.close(); }
private static void indexDocs(File file, File index, boolean create) throws Exception { if (!create) { // incrementally update reader = IndexReader.open(FSDirectory.open(index), false); // open existing index uidIter = reader.terms(new Term("uid", "")); // init uid iterator indexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.term() != null && uidIter.term().field() == "uid") { System.out.println("deleting " + HTMLDocument.uid2url(uidIter.term().text())); reader.deleteDocuments(uidIter.term()); uidIter.next(); } deleting = false; } uidIter.close(); // close uid iterator reader.close(); // close existing index } else // don't have exisiting indexDocs(file); }
IndexReader reader = IndexReader.open(dir, true); TermEnum te = reader.terms(new Term(field, "")); StringBuilder buf = new StringBuilder(); do { Term term = te.term(); if (term == null || term.field().equals(field) == false) { break; if (te.docFreq() > 30) { File f = new File(output, term.text() + ".txt"); PrintWriter pw = new PrintWriter(new FileWriter(f)); System.err.printf("%s %d\n", term.text(), te.docFreq()); TermDocs td = reader.termDocs(term); while (td.next()) { int doc = td.doc(); buf.setLength(0); appendVectorTerms(buf, reader.getTermFreqVector(doc, "description-clustering")); } while (te.next()); te.close();
/** * Constructs an iterator. * @throws IOException if IO errors occur */ private TermsForFieldIterator() throws IOException { this.terms = TermsForField.this.indexReader.terms( new Term(TermsForField.this.fieldName)); this.nextValue = this.terms == null || !isField(this.terms.term()) ? null : this.terms.term(); }
static LuceneUnsortedIntTermDocIterator create(final IndexReader r, final String field) throws IOException { final TermEnum terms = r.terms(new Term(field, "")); final TermDocs termDocs; try { termDocs = r.termDocs(); } catch (IOException e) { try { terms.close(); } catch (IOException e1) { log.error("error closing TermEnum", e1); } throw e; } return new LuceneUnsortedIntTermDocIterator(field, terms, termDocs); }
public BytesRef next() throws IOException { if (termsEnum != null) { Term actualTerm; do { actualTerm = termsEnum.term(); if (actualTerm == null || actualTerm.field() != field) { return null; } freq = termsEnum.docFreq(); spare.copyChars(actualTerm.text()); termsEnum.next(); return spare; } while(termsEnum.next()); } return null; }
@SuppressWarnings({"StringEquality"}) private boolean innerNextTerm() throws IOException { // loop instead of calling itself recursively to avoid potential stack overflow while (true) { if (!first) { if (!terms.next()) return false; } else { first = false; } final Term term = terms.term(); if (term == null || term.field() != field) return false; try { currentTerm = Long.parseLong(term.text()); } catch (NumberFormatException e) { continue; } termDocs.seek(terms); return true; } }
public void generate(IndexReader reader) throws IOException { TermEnum enumerator = new WildcardTermEnum(reader, wildcard); TermDocs termDocs = reader.termDocs(); try { do { Term term = enumerator.term(); if (term==null) break; termDocs.seek(term); while (termDocs.next()) { handleDoc(termDocs.doc()); } } while (enumerator.next()); } finally { termDocs.close(); enumerator.close(); } } }
@Override public boolean next() { if (termEnum == null) { return initialize(); } if (!hasNext) return false; final boolean nextSuccessful; try { nextSuccessful = termEnum.next(); } catch (IOException e) { throw LuceneUtils.ioRuntimeException(e); } hasNext = nextSuccessful && termEnum.term() != null && field.equals(termEnum.term().field()); return hasNext; }
protected float calculateWeight(Term term, IndexReader reader) throws IOException { //if a term is not in the index, then it's weight is 0 TermEnum termEnum = reader.terms(term); if (termEnum != null && termEnum.term() != null && termEnum.term().equals(term)) { return 1.0f / termEnum.docFreq(); } else { log.warn("Couldn't find doc freq for term {}", term); return 0; } }
/** Closes the enumeration to further activity, freeing resources. */ public void close() throws IOException { actualEnum.close(); currentTerm = null; actualEnum = null; } }
/** * Returns the docFreq of the current Term in the enumeration. * Initially invalid, valid after next() called for the first time. */ public int docFreq() { if (actualEnum == null) return -1; return actualEnum.docFreq(); }
public void seek(TermEnum termEnum) throws IOException { if (termEnum instanceof LucandraTermEnum) { this.termEnum = (LucandraTermEnum) termEnum; } else { this.termEnum = (LucandraTermEnum) indexReader.terms(termEnum.term()); } termDocs = this.termEnum.getTermDocFreq(); if (logger.isDebugEnabled()) logger.debug("seeked out " + termDocs.length); docPosition = -1; }