org.apache.lucene.util.BytesRefHash.sort java code examples

/** Collapse the hash table and sort in-place; also sets
 * this.sortedTermIDs to the results */
public int[] sortPostings() {
 sortedTermIDs = bytesHash.sort();
 return sortedTermIDs;
}

@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
 assert sortField.getType().equals(SortField.Type.STRING);
 assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null;
 int valueCount = hash.size();
 finalSortedValues = hash.sort();
 finalOrds = pending.build();
 finalOrdMap = new int[valueCount];
 for (int ord = 0; ord < valueCount; ord++) {
  finalOrdMap[finalSortedValues[ord]] = ord;
 }
 final SortedDocValues docValues =
   new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap,
     docsWithField.iterator());
 return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null);
}

@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
 assert sortField instanceof SortedSetSortField;
 assert finalOrds == null && finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
 int valueCount = hash.size();
 finalOrds = pending.build();
 finalOrdCounts = pendingCounts.build();
 finalSortedValues = hash.sort();
 finalOrdMap = new int[valueCount];
 for (int ord = 0; ord < valueCount; ord++) {
  finalOrdMap[finalSortedValues[ord]] = ord;
 }
 SortedSetSortField sf = (SortedSetSortField) sortField;
 final SortedSetDocValues dvs =
   new BufferedSortedSetDocValues(finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField.iterator());
 return Sorter.getDocComparator(maxDoc, sf, () -> SortedSetSelector.wrap(dvs, sf.getSelector()), () -> null);
}

@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
 final B builder = getTopLevelBuilder();
 final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
 collectTerms(reader, query, col);
 
 final int size = col.terms.size();
 if (size > 0) {
  final int sort[] = col.terms.sort();
  final float[] boost = col.array.boost;
  final TermContext[] termStates = col.array.termState;
  for (int i = 0; i < size; i++) {
   final int pos = sort[i];
   final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
   assert reader.docFreq(term) == termStates[pos].docFreq();
   addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
  }
 }
 return build(builder);
}

final int[] ordMap;
if (finalOrds == null) {
 sortedValues = hash.sort();
 ords = pending.build();
 ordMap = new int[valueCount];

ords = pending.build();
ordCounts = pendingCounts.build();
sortedValues = hash.sort();
ordMap = new int[valueCount];
for(int ord=0;ord<valueCount;ord++) {

/**
 * Sorts hashed terms into ascending order, reusing memory along the
 * way. Note that sorting is lazily delayed until required (often it's
 * not required at all). If a sorted view is required then hashing +
 * sort + binary search is still faster and smaller than TreeMap usage
 * (which would be an alternative and somewhat more elegant approach,
 * apart from more sophisticated Tries / prefix trees).
 */
void sortTerms() {
 if (sortedTerms == null) {
  sortedTerms = terms.sort();
 }
}

/** Collapse the hash table and sort in-place; also sets
 * this.sortedTermIDs to the results */
public int[] sortPostings() {
 sortedTermIDs = bytesHash.sort();
 return sortedTermIDs;
}

public BytesRefHashIterator(BytesRefHash terms) {
  this.terms = terms;
  this.sortedTerms = terms.sort();
}

private void prepareForUsage() {
 bytesIds = dvBytesValuesSet.sort();
}

/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
 ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
 org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
   FST.INPUT_TYPE.BYTE4, outputs);
 final int[] sort = hash.sort();
 IntsRefBuilder intsSpare = new IntsRefBuilder();
 final int size = hash.size();
 BytesRef spare = new BytesRef();
 for (int i = 0; i < size; i++) {
  int id = sort[i];
  BytesRef bytesRef = hash.get(id, spare);
  intsSpare.copyUTF8Bytes(bytesRef);
  builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
 }
 return new StemmerOverrideMap(builder.finish(), ignoreCase);
}

/** Collapse the hash table and sort in-place; also sets
 * this.sortedTermIDs to the results */
public int[] sortPostings() {
 sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 return sortedTermIDs;
}

TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) {
 this.field = field;
 this.multipleValuesPerDocument = multipleValuesPerDocument;
 this.terms = terms;
 this.scores = scores;
 this.originalQuery = originalQuery;
 this.ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 this.unwrittenOriginalQuery = originalQuery;
}

/**
 * @param field The field that should contain terms that are specified in the previous parameter
 * @param terms The terms that matching documents should have. The terms must be sorted by natural order.
 */
TermsQuery(String field, Query fromQuery, BytesRefHash terms) {
 super(field);
 this.fromQuery = fromQuery;
 this.terms = terms;
 ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
}

/** Collapse the hash table and sort in-place; also sets
 * this.sortedTermIDs to the results */
public int[] sortPostings() {
 sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 return sortedTermIDs;
}

/**
 * Sorts hashed terms into ascending order, reusing memory along the
 * way. Note that sorting is lazily delayed until required (often it's
 * not required at all). If a sorted view is required then hashing +
 * sort + binary search is still faster and smaller than TreeMap usage
 * (which would be an alternative and somewhat more elegant approach,
 * apart from more sophisticated Tries / prefix trees).
 */
public void sortTerms() {
 if (sortedTerms == null) {
  sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 }
}

SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefTermsSet termsSet) {
 super(tenum);
 this.terms = termsSet.getBytesRefHash();
 this.ords = this.terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 lastElement = terms.size() - 1;
 lastTerm = terms.get(ords[lastElement], new BytesRef());
 seekTerm = terms.get(ords[upto], spare);
}

@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
 assert sortField.getType().equals(SortField.Type.STRING);
 assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null;
 int valueCount = hash.size();
 finalSortedValues = hash.sort();
 finalOrds = pending.build();
 finalOrdMap = new int[valueCount];
 for (int ord = 0; ord < valueCount; ord++) {
  finalOrdMap[finalSortedValues[ord]] = ord;
 }
 final SortedDocValues docValues =
   new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap,
     docsWithField.iterator());
 return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null);
}

@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
 assert sortField instanceof SortedSetSortField;
 assert finalOrds == null && finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
 int valueCount = hash.size();
 finalOrds = pending.build();
 finalOrdCounts = pendingCounts.build();
 finalSortedValues = hash.sort();
 finalOrdMap = new int[valueCount];
 for (int ord = 0; ord < valueCount; ord++) {
  finalOrdMap[finalSortedValues[ord]] = ord;
 }
 SortedSetSortField sf = (SortedSetSortField) sortField;
 final SortedSetDocValues dvs =
   new BufferedSortedSetDocValues(finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField.iterator());
 return Sorter.getDocComparator(maxDoc, sf, () -> SortedSetSelector.wrap(dvs, sf.getSelector()), () -> null);
}

@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
 final B builder = getTopLevelBuilder();
 final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
 collectTerms(reader, query, col);
 
 final int size = col.terms.size();
 if (size > 0) {
  final int sort[] = col.terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  final float[] boost = col.array.boost;
  final TermContext[] termStates = col.array.termState;
  for (int i = 0; i < size; i++) {
   final int pos = sort[i];
   final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
   assert termStates[pos].hasOnlyRealTerms() == false || reader.docFreq(term) == termStates[pos].docFreq();
   addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
  }
 }
 return build(builder);
}

Javadoc

Returns the values array sorted by the referenced byte values.

Note: This is a destructive operation. #clear() must be called in order to reuse this BytesRefHash instance.

Popular methods of BytesRefHash

get
Populates and returns a BytesRef with the bytes for the given bytesID. Note: the given bytesID must
add
size
Returns the number of BytesRef values in this BytesRefHash.
<init>
Creates a new BytesRefHash
clear
Clears the BytesRef which maps to the given BytesRef
reinit
reinitializes the BytesRefHash after a previous #clear()call. If #clear() has not been called previo
addByPoolOffset
Adds a "arbitrary" int offset instead of a BytesRef term. This is used in the indexer to hold the ha
byteStart
Returns the bytesStart offset into the internally used ByteBlockPool for the given bytesID
compact
Returns the ids array in arbitrary order. Valid ids start at offset of 0 and end at a limit of #size
doHash
equals
findHash

Popular in Java

Parsing JSON documents to java classes using gson
setContentView (Activity)
requestLocationUpdates (LocationManager)
onRequestPermissionsResult (Fragment)
Socket (java.net)
Provides a client-side TCP socket.
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
JComboBox (javax.swing)
Top 12 Jupyter Notebook extensions

How to use sortmethodin org.apache.lucene.util.BytesRefHash

Best Java code snippets using org.apache.lucene.util.BytesRefHash.sort (Showing top 20 results out of 315)

How to use
sort
method
in
org.apache.lucene.util.BytesRefHash