org.apache.lucene.codecs.TermVectorsWriter java code examples

  vectors = sub.reader.get(sub.docID);
 addAllDocVectors(vectors, mergeState);
 docCount++;
finish(mergeState.mergeFieldInfos, docCount);
return docCount;

 startDocument(0);
 finishDocument();
 return;
startDocument(numFields);
 startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
 termsEnum = terms.iterator();
  startTerm(termsEnum.term(), freq);
    addPosition(pos, startOffset, endOffset, payload);
  finishTerm();
 finishField();
finishDocument();

tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
 tv.startTerm(flushTerm, freq);
   initReader(offReader, termID, 1);
  tv.addProx(freq, posReader, offReader);
 tv.finishTerm();
tv.finishField();

/** Fills in no-term-vectors for all docs we haven't seen
 *  since the last doc that had term vectors. */
void fill(int docID) throws IOException {
 while(lastDocID < docID) {
  writer.startDocument(0);
  writer.finishDocument();
  lastDocID++;
 }
}

/**
 * Merge the TermVectors from each of the segments into the new one.
 * @throws IOException if there is a low-level IO error
 */
private int mergeVectors() throws IOException {
 try (TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context)) {
  return termVectorsWriter.merge(mergeState);
 }
}

@Override
void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
 if (writer != null) {
  int numDocs = state.segmentInfo.maxDoc();
  assert numDocs > 0;
  // At least one doc in this run had term vectors enabled
  try {
   fill(numDocs);
   assert state.segmentInfo != null;
   writer.finish(state.fieldInfos, numDocs);
  } finally {
   IOUtils.close(writer);
   writer = null;
   lastDocID = 0;
   hasVectors = false;
  }
 }
}

 lastOffset = endOffset;
addPosition(position, startOffset, endOffset, thisPayload);

@Override
void finishDocument() throws IOException {
 if (!hasVectors) {
  return;
 }
 // Fields in term vectors are UTF16 sorted:
 ArrayUtil.introSort(perFields, 0, numVectorFields);
 initTermVectorsWriter();
 fill(docState.docID);
 // Append term vectors to the real outputs:
 writer.startDocument(numVectorFields);
 for (int i = 0; i < numVectorFields; i++) {
  perFields[i].finishDocument();
 }
 writer.finishDocument();
 assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
 lastDocID++;
 super.reset();
 resetFields();
}

return super.merge(mergeState);

@Override
void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
 super.flush(fieldsToFlush, state, sortMap);
 if (tmpDirectory != null) {
  if (sortMap == null) {
   // we're lucky the index is already sorted, just rename the temporary file and return
   for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
    tmpDirectory.rename(entry.getValue(), entry.getKey());
   }
   return;
  }
  TermVectorsReader reader = docWriter.codec.termVectorsFormat()
    .vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
  TermVectorsReader mergeReader = reader.getMergeInstance();
  TermVectorsWriter writer = docWriter.codec.termVectorsFormat()
    .vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
  try {
   reader.checkIntegrity();
   for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
    Fields vectors = mergeReader.get(sortMap.newToOld(docID));
    writeTermVectors(writer, vectors, state.fieldInfos);
   }
   writer.finish(state.fieldInfos, state.segmentInfo.maxDoc());
  } finally {
   IOUtils.close(reader, writer);
   IOUtils.deleteFiles(tmpDirectory,
     tmpDirectory.getTemporaryFiles().values());
  }
 }
}

 lastOffset = endOffset;
addPosition(position, startOffset, endOffset, thisPayload);

 writer.startDocument(0);
 writer.finishDocument();
 return;
writer.startDocument(numFields);
 writer.startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
 termsEnum = terms.iterator();
  writer.startTerm(termsEnum.term(), freq);
    writer.addPosition(pos, startOffset, endOffset, payload);
  writer.finishTerm();
 writer.finishField();
writer.finishDocument();

tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
 tv.startTerm(flushTerm, freq);
   initReader(offReader, termID, 1);
  tv.addProx(freq, posReader, offReader);
 tv.finishTerm();
tv.finishField();

/** Fills in no-term-vectors for all docs we haven't seen
 *  since the last doc that had term vectors. */
void fill(int docID) throws IOException {
 while(lastDocID < docID) {
  writer.startDocument(0);
  writer.finishDocument();
  lastDocID++;
 }
}

   vectors = termVectorsReader.get(docID);
  addAllDocVectors(vectors, mergeState);
  docCount++;
finish(mergeState.mergeFieldInfos, docCount);
return docCount;

/**
 * Merge the TermVectors from each of the segments into the new one.
 * @throws IOException if there is a low-level IO error
 */
private int mergeVectors() throws IOException {
 try (TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context)) {
  return termVectorsWriter.merge(mergeState);
 }
}

@Override
void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
 if (writer != null) {
  int numDocs = state.segmentInfo.maxDoc();
  assert numDocs > 0;
  // At least one doc in this run had term vectors enabled
  try {
   fill(numDocs);
   assert state.segmentInfo != null;
   writer.finish(state.fieldInfos, numDocs);
  } finally {
   IOUtils.close(writer);
   writer = null;
   lastDocID = 0;
   hasVectors = false;
  }
 }
}

 lastOffset = endOffset;
addPosition(position, startOffset, endOffset, thisPayload);

 startDocument(0);
 finishDocument();
 return;
startDocument(numFields);
 startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
 termsEnum = terms.iterator();
  startTerm(termsEnum.term(), freq);
    addPosition(pos, startOffset, endOffset, payload);
  finishTerm();
 finishField();
finishDocument();

tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
 tv.startTerm(flushTerm, freq);
   initReader(offReader, termID, 1);
  tv.addProx(freq, posReader, offReader);
 tv.finishTerm();
tv.finishField();

Javadoc

Codec API for writing term vectors:

For every document, #startDocument(int) is called, informing the Codec how many fields will be written.
#startField(FieldInfo,int,boolean,boolean,boolean) is called for each field in the document, informing the codec how many terms will be written for that field, and whether or not positions, offsets, or payloads are enabled.
Within each field, #startTerm(BytesRef,int) is called for each term.
If offsets and/or positions are enabled, then #addPosition(int,int,int,BytesRef) will be called for each term occurrence.
After all documents have been written, #finish(FieldInfos,int) is called for verification/sanity-checks.
Finally the writer is closed ( #close())

Most used methods

addAllDocVectors
Safe (but, slowish) default method to write every vector field in the document.
addPosition
Adds a term position and offsets
addProx
Called by IndexWriter when writing new segments. This is an expert API that allows the codec to cons
finish
Called before #close(), passing in the number of documents that were written. Note that this is inte
finishDocument
Called after a doc and all its fields have been added.
finishField
Called after a field and all its terms have been added.
finishTerm
Called after a term and all its positions have been added.
merge
Merges in the term vectors from the readers inmergeState. The default implementation skips over dele
startDocument
Called before writing the term vectors of the document. #startField(FieldInfo,int,boolean,boolean,bo
startField
Called before writing the terms of the field. #startTerm(BytesRef,int) will be called numTerms times
startTerm
Adds a term and its term frequency freq. If this field has positions and/or offsets enabled, then #a

startTerm

Popular in Java

Updating database using SQL prepared statement
getContentResolver (Context)
onCreateOptionsMenu (Activity)
runOnUiThread (Activity)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Option (scala)
Best IntelliJ plugins

How to useTermVectorsWriter in org.apache.lucene.codecs

Best Java code snippets using org.apache.lucene.codecs.TermVectorsWriter (Showing top 20 results out of 315)

How to use
TermVectorsWriter
in
org.apache.lucene.codecs