/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. * @param maxDeterminizedStates maximum number of states in the resulting * automata. If the automata would need more than this many states * TooComplextToDeterminizeException is thrown. Higher number require more * space but can process more complex automata. * @param isBinary if true, this automaton is already binary and * will not go through the UTF32ToUTF8 conversion */ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) { super(term.field()); this.term = term; this.automaton = automaton; this.automatonIsBinary = isBinary; // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); }
@Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { return compiled.getTermsEnum(terms); }
return addTail(state, output, idx, label);
/** {@inheritDoc} */ @Override public TermsEnum getTermsEnumForSuggestions(final Terms terms) { if (terms == null) { return TermsEnum.EMPTY; } BytesRef prefix = getPrefix(); if (prefix != null) { Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix); Automaton finalAutomaton; if (suggestPosition == SuggestPosition.LOWER) { Automaton binaryInt = Automata.makeBinaryInterval( getLowerTerm(), includesLower(), getUpperTerm(), includesUpper()); finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton); } else { Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower()); finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE); } CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton); try { return compiledAutomaton.getTermsEnum(terms); } catch (IOException e) { logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e); } } return TermsEnum.EMPTY; }
@Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + compiled.hashCode(); result = prime * result + ((term == null) ? 0 : term.hashCode()); return result; }
/** * return an automata-based enum for matching up to editDistance from * lastTerm, if possible */ private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException { assert editDistance < automata.length; final CompiledAutomaton compiled = automata[editDistance]; BytesRef initialSeekTerm; if (lastTerm == null) { // This is the first enum we are pulling: initialSeekTerm = null; } else { // We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2): initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder()); } return terms.intersect(compiled, initialSeekTerm); }
@Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; AutomatonQuery other = (AutomatonQuery) obj; if (!compiled.equals(other.compiled)) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; }
sinkState = findSinkState(this.automaton);
TermsEnum termsEnum; CompiledAutomaton compiledAutomation = new CompiledAutomaton(new RegExp(RcIrStringUtils.getQueryRegEx(queryText)).toAutomaton()); termsEnum = compiledAutomation.getTermsEnum(terms); termsEnum = compiledAutomation.getTermsEnum(terms);
@Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + compiled.hashCode(); result = prime * result + ((term == null) ? 0 : term.hashCode()); return result; }
/** * return an automata-based enum for matching up to editDistance from * lastTerm, if possible */ private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException { assert editDistance < automata.length; final CompiledAutomaton compiled = automata[editDistance]; BytesRef initialSeekTerm; if (lastTerm == null) { // This is the first enum we are pulling: initialSeekTerm = null; } else { // We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2): initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder()); } return terms.intersect(compiled, initialSeekTerm); }
@Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; AutomatonQuery other = (AutomatonQuery) obj; if (!compiled.equals(other.compiled)) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; }
sinkState = findSinkState(this.automaton);
/** Test Terms.intersect on this range, and validates that it returns the same doc ids as using non-intersect TermsEnum. Returns true if * any fake terms were seen. */ private static boolean checkSingleTermRange(String field, int maxDoc, Terms terms, BytesRef minTerm, BytesRef maxTerm, FixedBitSet normalDocs, FixedBitSet intersectDocs) throws IOException { //System.out.println(" check minTerm=" + minTerm.utf8ToString() + " maxTerm=" + maxTerm.utf8ToString()); assert minTerm.compareTo(maxTerm) <= 0; TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm); if (status != TermsEnum.SeekStatus.FOUND) { throw new RuntimeException("failed to seek to existing term field=" + field + " term=" + minTerm); } // Do "dumb" iteration to visit all terms in the range: long normalTermCount = getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false); // Now do the same operation using intersect: long intersectTermCount = getDocsFromTermRange(field, maxDoc, terms.intersect(new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, true, maxTerm, false), true, false, Integer.MAX_VALUE, true), null), intersectDocs, minTerm, maxTerm, true); if (intersectTermCount > normalTermCount) { throw new RuntimeException("intersect returned too many terms: field=" + field + " intersectTermCount=" + intersectTermCount + " normalTermCount=" + normalTermCount); } if (normalDocs.equals(intersectDocs) == false) { throw new RuntimeException("intersect visited different docs than straight terms enum: " + normalDocs.cardinality() + " for straight enum, vs " + intersectDocs.cardinality() + " for intersect, minTerm=" + minTerm + " maxTerm=" + maxTerm); } //System.out.println(" docs=" + normalTermCount); //System.out.println(" " + intersectTermCount + " vs " + normalTermCount); return intersectTermCount != normalTermCount; }
/** * Computes which global ordinals are accepted by this IncludeExclude instance. * */ @Override public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); TermsEnum globalTermsEnum; Terms globalTerms = new DocValuesTerms(globalOrdinals); // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits. globalTermsEnum = compiled.getTermsEnum(globalTerms); for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) { acceptedGlobalOrdinals.set(globalTermsEnum.ord()); } return acceptedGlobalOrdinals; }
@Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + compiled.hashCode(); result = prime * result + ((term == null) ? 0 : term.hashCode()); return result; }
/** * return an automata-based enum for matching up to editDistance from * lastTerm, if possible */ protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException { final List<CompiledAutomaton> runAutomata = initAutomata(editDistance); if (editDistance < runAutomata.size()) { //System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString())); final CompiledAutomaton compiled = runAutomata.get(editDistance); return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())), runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1])); } else { return null; } }
@Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; AutomatonQuery other = (AutomatonQuery) obj; if (!compiled.equals(other.compiled)) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; }
return addTail(state, output, idx, label);
sinkState = findSinkState(this.automaton);