public TopNSearcher(FST<T> fst, int topN, int maxQueueDepth, Comparator<T> comparator, Comparator<FSTPath<T>> pathComparator) { this.fst = fst; this.bytesReader = fst.getBytesReader(); this.topN = topN; this.maxQueueDepth = maxQueueDepth; this.comparator = comparator; this.pathComparator = pathComparator; queue = new TreeSet<>(pathComparator); }
private boolean assertRootCachedArc(int label, Arc<T> cachedArc) throws IOException { Arc<T> arc = new Arc<>(); getFirstArc(arc); BytesReader in = getBytesReader(); Arc<T> result = findTargetArc(label, arc, arc, in, false); if (result == null) { assert cachedArc == null; } else { assert cachedArc != null; assert cachedArc.arcIdx == result.arcIdx; assert cachedArc.bytesPerArc == result.bytesPerArc; assert cachedArc.flags == result.flags; assert cachedArc.label == result.label; assert cachedArc.nextArc == result.nextArc; assert cachedArc.nextFinalOutput.equals(result.nextFinalOutput); assert cachedArc.numArcs == result.numArcs; assert cachedArc.output.equals(result.output); assert cachedArc.posArcsStart == result.posArcsStart; assert cachedArc.target == result.target; } return true; }
/** doFloor controls the behavior of advance: if it's true * doFloor is true, advance positions to the biggest * term before target. */ protected FSTEnum(FST<T> fst) { this.fst = fst; fstReader = fst.getBytesReader(); NO_OUTPUT = fst.outputs.getNoOutput(); fst.getFirstArc(getArc(0)); output[0] = NO_OUTPUT; }
fstReader = null; } else { fstReader = fr.index.getBytesReader();
final BytesReader r = fst.getBytesReader();
/** Reverse lookup (lookup by output instead of by input), * in the special case when your FSTs outputs are * strictly ascending. This locates the input/output * pair where the output is equal to the target, and will * return null if that output does not exist. * * <p>NOTE: this only works with {@code FST<Long>}, only * works when the outputs are ascending in order with * the inputs. * For example, simple ordinals (0, 1, * 2, ...), or file offets (when appending to a file) * fit this. */ public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException { final BytesReader in = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>()); FST.Arc<Long> scratchArc = new FST.Arc<>(); final IntsRefBuilder result = new IntsRefBuilder(); return getByOutput(fst, targetOutput, in, arc, scratchArc, result); }
/** Looks up the output for this input, or null if the * input is not accepted */ public static<T> T get(FST<T> fst, BytesRef input) throws IOException { assert fst.inputType == FST.INPUT_TYPE.BYTE1; final BytesReader fstReader = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); // Accumulate output as we go T output = fst.outputs.getNoOutput(); for(int i=0;i<input.length;i++) { if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) { return null; } output = fst.outputs.add(output, arc.output); } if (arc.isFinal()) { return fst.outputs.add(output, arc.nextFinalOutput); } else { return null; } }
/** Looks up the output for this input, or null if the * input is not accepted. */ public static<T> T get(FST<T> fst, IntsRef input) throws IOException { // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final BytesReader fstReader = fst.getBytesReader(); // Accumulate output as we go T output = fst.outputs.getNoOutput(); for(int i=0;i<input.length;i++) { if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) { return null; } output = fst.outputs.add(output, arc.output); } if (arc.isFinal()) { return fst.outputs.add(output, arc.nextFinalOutput); } else { return null; } }
/** * Returns a {@link BytesReader} to pass to the {@link #get(char[], int, FST.Arc, FST.BytesReader)} method. */ public BytesReader getBytesReader() { if (fst == null) { return null; } else { return fst.getBytesReader(); } }
final BytesReader fstReader = fst.getBytesReader(); final T NO_OUTPUT = fst.outputs.getNoOutput();
fstReader = fr.index.getBytesReader();
getFirstArc(arc); if (targetHasArcs(arc)) { final BytesReader in = getBytesReader(); Arc<T>[] arcs = (Arc<T>[]) new Arc[0x80]; readFirstRealTargetArc(arc.target, arc, in);
int targetLabel = getTargetLabel(); final FST.BytesReader fstReader = fst.getBytesReader();
/** Default constructor that takes a {@link Reader}. */ public MappingCharFilter(NormalizeCharMap normMap, Reader in) { super(in); buffer.reset(in); map = normMap.map; cachedRootArcs = normMap.cachedRootArcs; if (map != null) { fstReader = map.getBytesReader(); } else { fstReader = null; } }
/** * Apply previously built synonyms to incoming tokens. * @param input input tokenstream * @param synonyms synonym map * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}. * Note, if you set this to true, it's your responsibility to lowercase * the input entries when you create the {@link SynonymMap} */ public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { super(input); this.synonyms = synonyms; this.fst = synonyms.fst; if (fst == null) { throw new IllegalArgumentException("fst must be non-null"); } this.fstReader = fst.getBytesReader(); scratchArc = new FST.Arc<>(); this.ignoreCase = ignoreCase; }
/** * Constructs a new Stemmer which will use the provided Dictionary to create its stems. * * @param dictionary Dictionary that will be used to create the stems */ public Stemmer(Dictionary dictionary) { this.dictionary = dictionary; this.affixReader = new ByteArrayDataInput(dictionary.affixData); for (int level = 0; level < 3; level++) { if (dictionary.prefixes != null) { prefixArcs[level] = new FST.Arc<>(); prefixReaders[level] = dictionary.prefixes.getBytesReader(); } if (dictionary.suffixes != null) { suffixArcs[level] = new FST.Arc<>(); suffixReaders[level] = dictionary.suffixes.getBytesReader(); } } formStep = dictionary.hasStemExceptions ? 2 : 1; }
return null; final FST.BytesReader bytesReader = fst.getBytesReader(); final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
private NormalizeCharMap(FST<CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: final FST.Arc<CharsRef> scratchArc = new FST.Arc<>(); final FST.BytesReader fstReader = map.getBytesReader(); map.getFirstArc(scratchArc); if (FST.targetHasArcs(scratchArc)) { map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader); while(true) { assert scratchArc.label != FST.END_LABEL; cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc<CharsRef>().copyFrom(scratchArc)); if (scratchArc.isLast()) { break; } map.readNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } } }
final FST.BytesReader in = fst.getBytesReader(); int low = arc.arcIdx; int high = arc.numArcs-1;
final FST.BytesReader in = fst.getBytesReader(); int low = arc.arcIdx; int high = arc.numArcs-1;