vectors = sub.reader.get(sub.docID); addAllDocVectors(vectors, mergeState); docCount++; finish(mergeState.mergeFieldInfos, docCount); return docCount;
startDocument(0); finishDocument(); return; startDocument(numFields); startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.iterator(); startTerm(termsEnum.term(), freq); addPosition(pos, startOffset, endOffset, payload); finishTerm(); finishField(); finishDocument();
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads); tv.startTerm(flushTerm, freq); initReader(offReader, termID, 1); tv.addProx(freq, posReader, offReader); tv.finishTerm(); tv.finishField();
/** Fills in no-term-vectors for all docs we haven't seen * since the last doc that had term vectors. */ void fill(int docID) throws IOException { while(lastDocID < docID) { writer.startDocument(0); writer.finishDocument(); lastDocID++; } }
/** * Merge the TermVectors from each of the segments into the new one. * @throws IOException if there is a low-level IO error */ private int mergeVectors() throws IOException { try (TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context)) { return termVectorsWriter.merge(mergeState); } }
@Override void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { if (writer != null) { int numDocs = state.segmentInfo.maxDoc(); assert numDocs > 0; // At least one doc in this run had term vectors enabled try { fill(numDocs); assert state.segmentInfo != null; writer.finish(state.fieldInfos, numDocs); } finally { IOUtils.close(writer); writer = null; lastDocID = 0; hasVectors = false; } } }
lastOffset = endOffset; addPosition(position, startOffset, endOffset, thisPayload);
@Override void finishDocument() throws IOException { if (!hasVectors) { return; } // Fields in term vectors are UTF16 sorted: ArrayUtil.introSort(perFields, 0, numVectorFields); initTermVectorsWriter(); fill(docState.docID); // Append term vectors to the real outputs: writer.startDocument(numVectorFields); for (int i = 0; i < numVectorFields; i++) { perFields[i].finishDocument(); } writer.finishDocument(); assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID; lastDocID++; super.reset(); resetFields(); }
return super.merge(mergeState);
@Override void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { super.flush(fieldsToFlush, state, sortMap); if (tmpDirectory != null) { if (sortMap == null) { // we're lucky the index is already sorted, just rename the temporary file and return for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) { tmpDirectory.rename(entry.getValue(), entry.getKey()); } return; } TermVectorsReader reader = docWriter.codec.termVectorsFormat() .vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT); TermVectorsReader mergeReader = reader.getMergeInstance(); TermVectorsWriter writer = docWriter.codec.termVectorsFormat() .vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT); try { reader.checkIntegrity(); for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) { Fields vectors = mergeReader.get(sortMap.newToOld(docID)); writeTermVectors(writer, vectors, state.fieldInfos); } writer.finish(state.fieldInfos, state.segmentInfo.maxDoc()); } finally { IOUtils.close(reader, writer); IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values()); } } }
lastOffset = endOffset; addPosition(position, startOffset, endOffset, thisPayload);
writer.startDocument(0); writer.finishDocument(); return; writer.startDocument(numFields); writer.startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.iterator(); writer.startTerm(termsEnum.term(), freq); writer.addPosition(pos, startOffset, endOffset, payload); writer.finishTerm(); writer.finishField(); writer.finishDocument();
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads); tv.startTerm(flushTerm, freq); initReader(offReader, termID, 1); tv.addProx(freq, posReader, offReader); tv.finishTerm(); tv.finishField();
/** Fills in no-term-vectors for all docs we haven't seen * since the last doc that had term vectors. */ void fill(int docID) throws IOException { while(lastDocID < docID) { writer.startDocument(0); writer.finishDocument(); lastDocID++; } }
vectors = termVectorsReader.get(docID); addAllDocVectors(vectors, mergeState); docCount++; finish(mergeState.mergeFieldInfos, docCount); return docCount;
/** * Merge the TermVectors from each of the segments into the new one. * @throws IOException if there is a low-level IO error */ private int mergeVectors() throws IOException { try (TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context)) { return termVectorsWriter.merge(mergeState); } }
@Override void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { if (writer != null) { int numDocs = state.segmentInfo.maxDoc(); assert numDocs > 0; // At least one doc in this run had term vectors enabled try { fill(numDocs); assert state.segmentInfo != null; writer.finish(state.fieldInfos, numDocs); } finally { IOUtils.close(writer); writer = null; lastDocID = 0; hasVectors = false; } } }
lastOffset = endOffset; addPosition(position, startOffset, endOffset, thisPayload);
startDocument(0); finishDocument(); return; startDocument(numFields); startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.iterator(); startTerm(termsEnum.term(), freq); addPosition(pos, startOffset, endOffset, payload); finishTerm(); finishField(); finishDocument();
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads); tv.startTerm(flushTerm, freq); initReader(offReader, termID, 1); tv.addProx(freq, posReader, offReader); tv.finishTerm(); tv.finishField();