/** * Extract common prefix from OR clause. This is beneficial because it reduces the amount * that needs to be checked for each branch. For example: {@code "ab|ac" => "a(b|c)"}. */ static Matcher extractPrefixFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { // Get the prefix for the first condition List<Matcher> matchers = matcher.<OrMatcher>as().matchers(); if (matchers.isEmpty()) { return matcher; } Matcher prefix = PatternUtils.getPrefix(matchers.get(0)); if (prefix.alwaysMatches()) { return matcher; } List<Matcher> ms = new ArrayList<>(); ms.add(PatternUtils.getSuffix(matchers.get(0))); // Verify all OR conditions have the same prefix for (Matcher m : matchers.subList(1, matchers.size())) { Matcher p = PatternUtils.getPrefix(m); if (!prefix.equals(p)) { return matcher; } ms.add(PatternUtils.getSuffix(m)); } return SeqMatcher.create(prefix, OrMatcher.create(ms)); } return matcher; }
/** * Extract common prefix from OR clause. This is beneficial because it reduces the amount * that needs to be checked for each branch. For example: {@code "ab|ac" => "a(b|c)"}. */ static Matcher extractPrefixFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { // Get the prefix for the first condition List<Matcher> matchers = matcher.<OrMatcher>as().matchers(); if (matchers.isEmpty()) { return matcher; } Matcher prefix = PatternUtils.getPrefix(matchers.get(0)); if (prefix.alwaysMatches()) { return matcher; } List<Matcher> ms = new ArrayList<>(); ms.add(PatternUtils.getSuffix(matchers.get(0))); // Verify all OR conditions have the same prefix for (Matcher m : matchers.subList(1, matchers.size())) { Matcher p = PatternUtils.getPrefix(m); if (!prefix.equals(p)) { return matcher; } ms.add(PatternUtils.getSuffix(m)); } return SeqMatcher.create(prefix, OrMatcher.create(ms)); } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }