org.apache.lucene.util.IntsRefBuilder java code examples

Refine search

 } else if (comp == 0) {
  path.input.append(path.arc.label);
  final int cmp = bottom.input.get().compareTo(path.input.get());
  path.input.setLength(path.input.length() - 1);
IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(path.input.get());
newInput.append(path.arc.label);

/** Just maps each UTF16 unit (char) to the ints in an
 *  IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
 final int charLimit = s.length();
 scratch.setLength(charLimit);
 scratch.grow(charLimit);
 for (int idx = 0; idx < charLimit; idx++) {
  scratch.setIntAt(idx, (int) s.charAt(idx));
 }
 return scratch.get();
}

/** Just takes unsigned byte values from the BytesRef and
 *  converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
 scratch.clear();
 for(int i=0;i<input.length;i++) {
  scratch.append(input.bytes[i+input.offset] & 0xFF);
 }
 return scratch.get();
}

private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
 IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
 Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
 IntsRefBuilder scratch = new IntsRefBuilder();
 for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
  Util.toUTF32(entry.getKey(), scratch);
  List<Integer> entries = entry.getValue();
  IntsRef output = new IntsRef(entries.size());
  for (Integer c : entries) {
   output.ints[output.length++] = c;
  }
  builder.add(scratch.get(), output);
 }
 return builder.finish();
}

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
 Map<String,String> mappings = new TreeMap<>();
 
 for (int i = 0; i < num; i++) {
  String line = reader.readLine();
  String parts[] = line.split("\\s+");
  if (parts.length != 3) {
   throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
  }
  if (mappings.put(parts[1], parts[2]) != null) {
   throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
  }
 }
 
 Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
 Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String,String> entry : mappings.entrySet()) {
  Util.toUTF16(entry.getKey(), scratchInts);
  builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
 }
 
 return builder.finish();
}

/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
 ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
 org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
   FST.INPUT_TYPE.BYTE4, outputs);
 final int[] sort = hash.sort();
 IntsRefBuilder intsSpare = new IntsRefBuilder();
 final int size = hash.size();
 BytesRef spare = new BytesRef();
 for (int i = 0; i < size; i++) {
  int id = sort[i];
  BytesRef bytesRef = hash.get(id, spare);
  intsSpare.copyUTF8Bytes(bytesRef);
  builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
 }
 return new StemmerOverrideMap(builder.finish(), ignoreCase);
}

private void updateFST(SortedMap<String, Double> weights) throws IOException {
 PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
 BytesRefBuilder scratchBytes = new BytesRefBuilder();
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String, Double> entry : weights.entrySet()) {
  scratchBytes.copyChars(entry.getKey());
  fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
      .getValue().longValue());
 }
 fst = fstBuilder.finish();
}

     new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions);
 for (IntsRef string; (string = finiteStrings.next()) != null; count++) {
  Util.toBytesRef(string, scratch);
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
BytesRefBuilder analyzed = new BytesRefBuilder();
BytesRef surface = new BytesRef();
IntsRefBuilder scratchInts = new IntsRefBuilder();
ByteArrayDataInput input = new ByteArrayDataInput();
 analyzed.append((byte) dedup);
 Util.toIntsRef(analyzed.get(), scratchInts);
  builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
 } else {
  int payloadOffset = input.getPosition() + surface.length;
  System.arraycopy(bytes.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
  br.length = br.bytes.length;
  builder.add(scratchInts.get(), outputs.newPair(cost, br));

public void finishTerm(long defaultWeight) throws IOException {
  ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
  int deduplicator = 0;
  analyzed.append((byte) 0);
  analyzed.setLength(analyzed.length() + 1);
  analyzed.grow(analyzed.length());
  for (int i = 0; i < count; i++) {
    analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
    Util.toIntsRef(analyzed.get(), scratchInts);
    SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
    long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
    builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
  }
  seenSurfaceForms.clear();
  count = 0;
}

/** If this automaton accepts a single input, return it.  Else, return null.
 *  The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
 if (a.isDeterministic() == false) {
  throw new IllegalArgumentException("input automaton must be deterministic");
 }
 IntsRefBuilder builder = new IntsRefBuilder();
 HashSet<Integer> visited = new HashSet<>();
 int s = 0;
 Transition t = new Transition();
 while (true) {
  visited.add(s);
  if (a.isAccept(s) == false) {
   if (a.getNumTransitions(s) == 1) {
    a.getTransition(s, 0, t);
    if (t.min == t.max && !visited.contains(t.dest)) {
     builder.append(t.min);
     s = t.dest;
     continue;
    }
   }
  } else if (a.getNumTransitions(s) == 0) {
   return builder.get();
  }
  // Automaton accepts more than one string:
  return null;
 }
}

assert lastInput.length() == 0 || input.compareTo(lastInput.get()) >= 0: "inputs are added out of order lastInput=" + lastInput.get() + " vs input=" + input;
assert validOutput(output);
final int pos1Stop = Math.min(lastInput.length(), input.length);
while(true) {
 frontier[pos1].inputCount++;
 if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
  break;
freezeTail(prefixLenPlus1);
if (lastInput.length() != input.length || prefixLenPlus1 != input.length + 1) {
 lastNode.isFinal = true;
 lastNode.output = NO_OUTPUT;
 assert validOutput(lastOutput);
if (lastInput.length() == input.length && prefixLenPlus1 == 1+input.length) {
lastInput.copyInts(input);

/**
 * Constructor.
 *
 * @param a Automaton to create finite string from.
 * @param startState The starting state for each path.
 * @param endState The state where each path should stop or -1 if only accepted states should be final.
 */
public FiniteStringsIterator(Automaton a, int startState, int endState) {
 this.a = a;
 this.endState = endState;
 this.nodes = new PathNode[16];
 for (int i = 0, end = nodes.length; i < end; i++) {
  nodes[i] = new PathNode();
 }
 this.string = new IntsRefBuilder();
 this.pathStates = new BitSet(a.getNumStates());
 this.string.setLength(0);
 this.emitEmptyString = a.isAccept(0);
 // Start iteration with node startState.
 if (a.getNumTransitions(startState) > 0) {
  pathStates.set(startState);
  nodes[0].resetState(a, startState);
  string.append(startState);
 }
}

IntsRefBuilder scratchInts = new IntsRefBuilder();
 IntsRefBuilder currentOrds = new IntsRefBuilder();
    Util.toUTF32(currentEntry, scratchInts);
    words.add(scratchInts.get(), currentOrds.get());
    currentOrds = new IntsRefBuilder(); // must be this way
    currentOrds.append(ord);
    currentOrds.append(stemExceptionID);
   } else {
    currentOrds.append(ord);
 Util.toUTF32(currentEntry, scratchInts);
 words.add(scratchInts.get(), currentOrds.get());
 success2 = true;
} finally {

int state;
int pos = 0;
savedStates.grow(seekBytesRef.length()+1);
savedStates.setIntAt(0, 0);
 linear = false;
 for (state = savedStates.intAt(pos); pos < seekBytesRef.length(); pos++) {
  visited[state] = curGen;
  int nextState = runAutomaton.step(state, seekBytesRef.byteAt(pos) & 0xff);
  if (nextState == -1)
   break;
  savedStates.setIntAt(pos+1, nextState);
  if ((pos = backtrack(pos)) < 0) /* no more solutions at all */
   return false;
  final int newState = runAutomaton.step(savedStates.intAt(pos), seekBytesRef.byteAt(pos) & 0xff);
  if (newState >= 0 && runAutomaton.isAccept(newState))

/** Returns final FST.  NOTE: this will return null if
 *  nothing is accepted by the FST. */
public FST<T> finish() throws IOException {
 final UnCompiledNode<T> root = frontier[0];
 // minimize nodes in the last word's suffix
 freezeTail(0);
 if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
  if (fst.emptyOutput == null) {
   return null;
  } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
   // empty string got pruned
   return null;
  }
 } else {
  if (minSuffixCount2 != 0) {
   compileAllTargets(root, lastInput.length());
  }
 }
 //if (DEBUG) System.out.println("  builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
 fst.finish(compileNode(root, lastInput.length()).node);
 return fst;
}

private void freezeTail(int prefixLenPlus1) throws IOException {
 for(int idx=lastInput.length(); idx >= downTo; idx--) {
   parent.deleteLast(lastInput.intAt(idx-1), node);
  } else {
    compileAllTargets(node, lastInput.length()-idx);
    parent.replaceLast(lastInput.intAt(idx-1),
              compileNode(node, 1+lastInput.length()-idx),
              nextFinalOutput,
              isFinal);
    parent.replaceLast(lastInput.intAt(idx-1),
              node,
              nextFinalOutput,

for (int i = builder.length(); --i > 0;)
  builder.setCharAt(i, Character.toLowerCase(builder.charAt(i)));
this.utf32 = UTF16ToUTF32(builder, utf32Builder).get();
builder.setLength(0);
maxPathsBuilder.clear();
maxPathsBuilder.grow(utf32.length + 1);
Arrays.fill(maxPathsBuilder.ints(), 0, utf32.length + 1, Integer.MAX_VALUE);

/** Starting from node, find the top N min cost 
 *  completions to a final node. */
public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
                        boolean allowEmptyString) throws IOException {
 // All paths are kept, so we can pass topN for
 // maxQueueDepth and the pruning is admissible:
 TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator);
 // since this search is initialized with a single start node 
 // it is okay to start with an empty input path here
 searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder());
 return searcher.search();
}

/** Reverse lookup (lookup by output instead of by input),
 *  in the special case when your FSTs outputs are
 *  strictly ascending.  This locates the input/output
 *  pair where the output is equal to the target, and will
 *  return null if that output does not exist.
 *
 *  <p>NOTE: this only works with {@code FST<Long>}, only
 *  works when the outputs are ascending in order with
 *  the inputs.
 *  For example, simple ordinals (0, 1,
 *  2, ...), or file offets (when appending to a file)
 *  fit this. */
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
 final BytesReader in = fst.getBytesReader();
 // TODO: would be nice not to alloc this on every lookup
 FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
 
 FST.Arc<Long> scratchArc = new FST.Arc<>();
 final IntsRefBuilder result = new IntsRefBuilder();
 return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
}

 @Override
 public String toString() {
  return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
 }
}

Javadoc

A builder for IntsRef instances.

Most used methods

<init>
Sole constructor.
get
Return a IntsRef that points to the internal content of this builder. Any update to the content of t
grow
Used to grow the reference array. In general this should not be used as it does not take the offset
setIntAt
Set an int.
setLength
Set the length.
append
Append the provided int to this buffer.
clear
Empty this builder.
intAt
Return the int at the given offset.
length
Return the number of ints in this buffer.
copyInts
Copies the given array into this instance.
copyUTF8Bytes
Copy the given UTF-8 bytes into this builder. Works as if the bytes were first converted from UTF-8
ints
Return a reference to the ints of this builder.

Popular in Java

Start an intent from android
getContentResolver (Context)
compareTo (BigDecimal)
notifyDataSetChanged (ArrayAdapter)
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
BoxLayout (javax.swing)
Table (org.hibernate.mapping)
A relational table
CodeWhisperer alternatives

How to useIntsRefBuilder in org.apache.lucene.util

Best Java code snippets using org.apache.lucene.util.IntsRefBuilder (Showing top 20 results out of 315)

Refine search

How to use
IntsRefBuilder
in
org.apache.lucene.util