break; case REGEXP_CHAR: a = Automata.makeChar(c); break; case REGEXP_CHAR_RANGE: a = Automata.makeCharRange(from, to); break; case REGEXP_ANYCHAR: a = Automata.makeAnyChar(); break; case REGEXP_EMPTY: a = Automata.makeEmpty(); break; case REGEXP_STRING: a = Automata.makeString(s); break; case REGEXP_ANYSTRING: a = Automata.makeAnyString(); break; case REGEXP_AUTOMATON: break; case REGEXP_INTERVAL: a = Automata.makeDecimalInterval(min, max, digits); break;
return Automata.makeEmpty(); b = Automata.makeEmptyString(); } else if (min == 1) { b = new Automaton();
public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { if (lowerTerm == null) { // makeBinaryInterval is more picky than we are: includeLower = true; } if (upperTerm == null) { // makeBinaryInterval is more picky than we are: includeUpper = true; } return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper); }
switch(c) { case WILDCARD_STRING: automata.add(Automata.makeAnyString()); break; case WILDCARD_CHAR: automata.add(Automata.makeAnyChar()); break; case WILDCARD_ESCAPE: final int nextChar = wildcardText.codePointAt(i + length); length += Character.charCount(nextChar); automata.add(Automata.makeChar(nextChar)); break; automata.add(Automata.makeChar(c));
/** * Constructs sub-automaton corresponding to decimal numbers of value at least * x.substring(n) and length x.substring(n).length(). */ private static int atLeast(Automaton.Builder builder, String x, int n, Collection<Integer> initials, boolean zeros) { int s = builder.createState(); if (x.length() == n) { builder.setAccept(s, true); } else { if (zeros) { initials.add(s); } char c = x.charAt(n); builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && c == '0'), c); if (c < '9') { builder.addTransition(s, anyOfRightLength(builder, x, n + 1), (char) (c + 1), '9'); } } return s; }
/** * Constructs sub-automaton corresponding to decimal numbers of value at most * x.substring(n) and length x.substring(n).length(). */ private static int atMost(Automaton.Builder builder, String x, int n) { int s = builder.createState(); if (x.length() == n) { builder.setAccept(s, true); } else { char c = x.charAt(n); builder.addTransition(s, atMost(builder, x, (char) n + 1), c); if (c > '0') { builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', (char) (c - 1)); } } return s; }
for (IntsRef string; (string = finiteStrings.next()) != null;) { if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) { subs.add(Automata.makeString(string.ints, string.offset, string.length)); } else { int ints[] = new int[string.length-nonFuzzyPrefix]; return Automata.makeEmpty(); // matches nothing } else if (subs.size() == 1) {
/** Return an {@link Automaton} that matches the given pattern. */ public static Automaton simpleMatchToAutomaton(String pattern) { List<Automaton> automata = new ArrayList<>(); int previous = 0; for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) { automata.add(Automata.makeString(pattern.substring(previous, i))); automata.add(Automata.makeAnyString()); previous = i + 1; } automata.add(Automata.makeString(pattern.substring(previous))); return Operations.concatenate(automata); }
/** * Returns a function that filters a document map based on the given include and exclude rules. * @see #filter(Map, String[], String[]) for details */ public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) { CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString()); CharacterRunAutomaton include; if (includes == null || includes.length == 0) { include = matchAllAutomaton; } else { Automaton includeA = Regex.simpleMatchToAutomaton(includes); includeA = makeMatchDotsInFieldNames(includeA); include = new CharacterRunAutomaton(includeA); } Automaton excludeA; if (excludes == null || excludes.length == 0) { excludeA = Automata.makeEmpty(); } else { excludeA = Regex.simpleMatchToAutomaton(excludes); excludeA = makeMatchDotsInFieldNames(excludeA); } CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA); // NOTE: We cannot use Operations.minus because of the special case that // we want all sub properties to match as soon as an object matches return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton); }
/** Make matches on objects also match dots in field names. * For instance, if the original simple regex is `foo`, this will translate * it into `foo` OR `foo.*`. */ private static Automaton makeMatchDotsInFieldNames(Automaton automaton) { return Operations.union( automaton, Operations.concatenate(Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString()))); }
/** * Returns a new (deterministic and minimal) automaton that accepts the union * of the given collection of {@link BytesRef}s representing UTF-8 encoded * strings. * * @param utf8Strings * The input strings, UTF-8 encoded. The collection must be in sorted * order. * * @return An {@link Automaton} accepting all input strings. The resulting * automaton is codepoint based (full unicode codepoints on * transitions). */ public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) { if (utf8Strings.isEmpty()) { return makeEmpty(); } else { return DaciukMihovAutomatonBuilder.build(utf8Strings); } } }
private Automaton toAutomaton() { Automaton a = null; if (include != null) { a = include.toAutomaton(); } else if (includeValues != null) { a = Automata.makeStringUnion(includeValues); } else { a = Automata.makeAnyString(); } if (exclude != null) { a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } else if (excludeValues != null) { a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } return a; }
assert prefix != null; if (n == 0) { return Automata.makeString(prefix + UnicodeUtil.newString(word, 0, word.length));
/** * Constructs sub-automaton corresponding to decimal numbers of value between * x.substring(n) and y.substring(n) and of length x.substring(n).length() * (which must be equal to y.substring(n).length()). */ private static int between(Automaton.Builder builder, String x, String y, int n, Collection<Integer> initials, boolean zeros) { int s = builder.createState(); if (x.length() == n) { builder.setAccept(s, true); } else { if (zeros) { initials.add(s); } char cx = x.charAt(n); char cy = y.charAt(n); if (cx == cy) { builder.addTransition(s, between(builder, x, y, n + 1, initials, zeros && cx == '0'), cx); } else { // cx<cy builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && cx == '0'), cx); builder.addTransition(s, atMost(builder, y, n + 1), cy); if (cx + 1 < cy) { builder.addTransition(s, anyOfRightLength(builder, x, n+1), (char) (cx + 1), (char) (cy - 1)); } } } return s; }
cmp = -1; if (min.length == 0 && minInclusive) { return makeAnyBinary(); return makeEmpty(); } else { return makeBinary(min); return makeEmpty(); suffixIsZeros(max, min.length)) { return makeEmpty(); } else { return makeBinary(min);
Term prefix = pq.getPrefix(); if (prefix.field().equals(field)) { list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) { @Override public String toString() { list.add(new CharacterRunAutomaton(Automata.makeEmpty()) { @Override public boolean run(char[] s, int offset, int length) {
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) { final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString()); final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR); if (matchAllContexts || contexts.size() == 0) { return Operations.concatenate(matchAllAutomaton, sep); } else { Automaton contextsAutomaton = null; for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) { final ContextMetaData contextMetaData = entry.getValue(); final IntsRef ref = entry.getKey(); Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length); if (contextMetaData.exact == false) { contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton); } contextAutomaton = Operations.concatenate(contextAutomaton, sep); if (contextsAutomaton == null) { contextsAutomaton = contextAutomaton; } else { contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton); } } return contextsAutomaton; } }
/** * Create a automaton for a given context query this automaton will be used * to find the matching paths with the fst * * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query * @param queries list of {@link ContextQuery} defining the lookup context * * @return Automaton matching the given Query */ public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) { Automaton a = Automata.makeEmptyString(); Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR); if (preserveSep) { // if separators are preserved the fst contains a SEP_LABEL // behind each gap. To have a matching automaton, we need to // include the SEP_LABEL in the query as well gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL)); } for (ContextQuery query : queries) { a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a)); } // TODO: should we limit this? Do any of our ContextQuery impls really create exponential regexps? GeoQuery looks safe (union // of strings). return Operations.determinize(a, Integer.MAX_VALUE); }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores)); // if separators are preserved the fst contains a SEP_LABEL // behind each gap. To have a matching automaton, we need to // include the SEP_LABEL in the query as well Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL)); Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton()); Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton); contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES); final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size()); final TreeSet<Integer> contextLengths = new TreeSet<>(); for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) { ContextMetaData contextMetaData = entry.getValue(); contextMap.put(entry.getKey(), contextMetaData.boost); contextLengths.add(entry.getKey().length); } int[] contextLengthArray = new int[contextLengths.size()]; final Iterator<Integer> iterator = contextLengths.descendingIterator(); for (int i = 0; iterator.hasNext(); i++) { contextLengthArray[i] = iterator.next(); } return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray); }
/** * Constructs sub-automaton corresponding to decimal numbers of length * x.substring(n).length(). */ private static int anyOfRightLength(Automaton.Builder builder, String x, int n) { int s = builder.createState(); if (x.length() == n) { builder.setAccept(s, true); } else { builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', '9'); } return s; }