/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*.*" => ".*"}). */ static Matcher removeSequentialMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); if (zm1.repeated() instanceof AnyMatcher && zm1.next() instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm2 = zm1.next().as(); if (zm2.repeated() instanceof AnyMatcher) { return zm2; } } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*.*" => ".*"}). */ static Matcher removeSequentialMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); if (zm1.repeated() instanceof AnyMatcher && zm1.next() instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm2 = zm1.next().as(); if (zm2.repeated() instanceof AnyMatcher) { return zm2; } } } return matcher; }
/** * If the matcher preceding an OR clause is a repeated any match, move into each branch * of the OR clause. This allows for other optimizations such as conversion to an indexOf * to take effect for each branch. */ static Matcher inlineMatchAnyPrecedingOr(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof OrMatcher) { List<Matcher> matchers = zm.next().<OrMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { ms.add(new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, m)); } return OrMatcher.create(ms); } } return matcher; }
/** * If the matcher preceding an OR clause is a repeated any match, move into each branch * of the OR clause. This allows for other optimizations such as conversion to an indexOf * to take effect for each branch. */ static Matcher inlineMatchAnyPrecedingOr(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof OrMatcher) { List<Matcher> matchers = zm.next().<OrMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { ms.add(new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, m)); } return OrMatcher.create(ms); } } return matcher; }
/** * Get the suffix matcher. This is similar to {@link #tail(Matcher)} except that it intended * to be used with {@link #getPrefix(Matcher)} */ static Matcher getSuffix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return zm.next(); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.length() <= 1 ? TrueMatcher.INSTANCE : new CharSeqMatcher(pattern.substring(1)); } else { return TrueMatcher.INSTANCE; } } }
/** * Get the suffix matcher. This is similar to {@link #tail(Matcher)} except that it intended * to be used with {@link #getPrefix(Matcher)} */ static Matcher getSuffix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return zm.next(); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.length() <= 1 ? TrueMatcher.INSTANCE : new CharSeqMatcher(pattern.substring(1)); } else { return TrueMatcher.INSTANCE; } } }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }