private FST.Arc<BytesRef> getArc(int ord) { if (ord >= arcs.length) { @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(arcs, 0, next, 0, arcs.length); for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) { next[arcOrd] = new FST.Arc<>(); } arcs = next; } return arcs[ord]; }
private FST.Arc<BytesRef> getArc(int ord) { if (ord >= arcs.length) { @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(arcs, 0, next, 0, arcs.length); for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) { next[arcOrd] = new FST.Arc<>(); } arcs = next; } return arcs[ord]; }
private boolean assertRootCachedArc(int label, Arc<T> cachedArc) throws IOException { Arc<T> arc = new Arc<>(); getFirstArc(arc); BytesReader in = getBytesReader(); Arc<T> result = findTargetArc(label, arc, arc, in, false); if (result == null) { assert cachedArc == null; } else { assert cachedArc != null; assert cachedArc.arcIdx == result.arcIdx; assert cachedArc.bytesPerArc == result.bytesPerArc; assert cachedArc.flags == result.flags; assert cachedArc.label == result.label; assert cachedArc.nextArc == result.nextArc; assert cachedArc.nextFinalOutput.equals(result.nextFinalOutput); assert cachedArc.numArcs == result.numArcs; assert cachedArc.output.equals(result.output); assert cachedArc.posArcsStart == result.posArcsStart; assert cachedArc.target == result.target; } return true; }
private FST.Arc<Output> getArc(int ord) { if (ord >= arcs.length) { @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<Output>[] next = new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(arcs, 0, next, 0, arcs.length); for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) { next[arcOrd] = new FST.Arc<>(); } arcs = next; } return arcs[ord]; }
FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<>();
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException { final FST.BytesReader bytesReader = fst.getBytesReader(); final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>()); final CharsRef NO_OUTPUT = fst.outputs.getNoOutput(); final FST.Arc<CharsRef> arc = new FST.Arc<>(); int longestMatch; CharsRef longestOutput;
/** * @param input input tokenstream * @param synonyms synonym map * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}. * Note, if you set this to true, it's your responsibility to lowercase * the input entries when you create the {@link SynonymMap} */ public SynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { super(input); this.synonyms = synonyms; this.ignoreCase = ignoreCase; this.fst = synonyms.fst; if (fst == null) { throw new IllegalArgumentException("fst must be non-null"); } this.fstReader = fst.getBytesReader(); // Must be 1+ so that when roll buffer is at full // lookahead we can distinguish this full buffer from // the empty buffer: rollBufferSize = 1+synonyms.maxHorizontalContext; futureInputs = new PendingInput[rollBufferSize]; futureOutputs = new PendingOutputs[rollBufferSize]; for(int pos=0;pos<rollBufferSize;pos++) { futureInputs[pos] = new PendingInput(); futureOutputs[pos] = new PendingOutputs(); } //System.out.println("FSTFilt maxH=" + synonyms.maxHorizontalContext); scratchArc = new FST.Arc<>(); }
final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
private NormalizeCharMap(FST<CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: final FST.Arc<CharsRef> scratchArc = new FST.Arc<>(); final FST.BytesReader fstReader = map.getBytesReader(); map.getFirstArc(scratchArc); if (FST.targetHasArcs(scratchArc)) { map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader); while(true) { assert scratchArc.label != FST.END_LABEL; cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc<CharsRef>().copyFrom(scratchArc)); if (scratchArc.isLast()) { break; } map.readNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } } }
/** * Constructs a new Stemmer which will use the provided Dictionary to create its stems. * * @param dictionary Dictionary that will be used to create the stems */ public Stemmer(Dictionary dictionary) { this.dictionary = dictionary; this.affixReader = new ByteArrayDataInput(dictionary.affixData); for (int level = 0; level < 3; level++) { if (dictionary.prefixes != null) { prefixArcs[level] = new FST.Arc<>(); prefixReaders[level] = dictionary.prefixes.getBytesReader(); } if (dictionary.suffixes != null) { suffixArcs[level] = new FST.Arc<>(); suffixReaders[level] = dictionary.suffixes.getBytesReader(); } } formStep = dictionary.hasStemExceptions ? 2 : 1; }
/** * Apply previously built synonyms to incoming tokens. * @param input input tokenstream * @param synonyms synonym map * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}. * Note, if you set this to true, it's your responsibility to lowercase * the input entries when you create the {@link SynonymMap} */ public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { super(input); this.synonyms = synonyms; this.fst = synonyms.fst; if (fst == null) { throw new IllegalArgumentException("fst must be non-null"); } this.fstReader = fst.getBytesReader(); scratchArc = new FST.Arc<>(); this.ignoreCase = ignoreCase; }
arcs[arcIdx] = new FST.Arc<>();
arcs[arcIdx] = new FST.Arc<>();
/** Reverse lookup (lookup by output instead of by input), * in the special case when your FSTs outputs are * strictly ascending. This locates the input/output * pair where the output is equal to the target, and will * return null if that output does not exist. * * <p>NOTE: this only works with {@code FST<Long>}, only * works when the outputs are ascending in order with * the inputs. * For example, simple ordinals (0, 1, * 2, ...), or file offets (when appending to a file) * fit this. */ public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException { final BytesReader in = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>()); FST.Arc<Long> scratchArc = new FST.Arc<>(); final IntsRefBuilder result = new IntsRefBuilder(); return getByOutput(fst, targetOutput, in, arc, scratchArc, result); }
/** Looks up the output for this input, or null if the * input is not accepted. */ public static<T> T get(FST<T> fst, IntsRef input) throws IOException { // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final BytesReader fstReader = fst.getBytesReader(); // Accumulate output as we go T output = fst.outputs.getNoOutput(); for(int i=0;i<input.length;i++) { if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) { return null; } output = fst.outputs.add(output, arc.output); } if (arc.isFinal()) { return fst.outputs.add(output, arc.nextFinalOutput); } else { return null; } }
/** Looks up the output for this input, or null if the * input is not accepted */ public static<T> T get(FST<T> fst, BytesRef input) throws IOException { assert fst.inputType == FST.INPUT_TYPE.BYTE1; final BytesReader fstReader = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); // Accumulate output as we go T output = fst.outputs.getNoOutput(); for(int i=0;i<input.length;i++) { if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) { return null; } output = fst.outputs.add(output, arc.output); } if (arc.isFinal()) { return fst.outputs.add(output, arc.nextFinalOutput); } else { return null; } }
private FST.Arc<T> getArc(int idx) { if (arcs[idx] == null) { arcs[idx] = new FST.Arc<>(); } return arcs[idx]; } }
public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) { this.arc = new FST.Arc<T>().copyFrom(arc); this.output = output; this.input = input; this.boost = boost; this.context = context; this.payload = payload; }