/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** * If a sequence contains an explicit false matcher then the whole sequence will never match * and can be treated as false. */ static Matcher sequenceWithFalseIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof FalseMatcher) { return FalseMatcher.INSTANCE; } } } return matcher; }
/** * Remove duplicate branches in an OR clause. For example: {@code "a|b|a" => "a|b"}. */ static Matcher dedupOr(Matcher matcher) { if (matcher instanceof OrMatcher) { List<Matcher> ms = new ArrayList<>( new LinkedHashSet<>(matcher.<OrMatcher>as().matchers())); return OrMatcher.create(ms); } return matcher; }
/** * Remove duplicate branches in an OR clause. For example: {@code "a|b|a" => "a|b"}. */ static Matcher dedupOr(Matcher matcher) { if (matcher instanceof OrMatcher) { List<Matcher> ms = new ArrayList<>( new LinkedHashSet<>(matcher.<OrMatcher>as().matchers())); return OrMatcher.create(ms); } return matcher; }
/** * Remove branches that are false from the OR clause. For example: {@code "a|$b|c" => "a|c"}. */ static Matcher removeFalseBranchesFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { List<Matcher> ms = matcher.<OrMatcher>as() .matchers() .stream() .filter(m -> !(m instanceof FalseMatcher)) .collect(Collectors.toList()); return OrMatcher.create(ms); } return matcher; }
/** * Remove branches that are false from the OR clause. For example: {@code "a|$b|c" => "a|c"}. */ static Matcher removeFalseBranchesFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { List<Matcher> ms = matcher.<OrMatcher>as() .matchers() .stream() .filter(m -> !(m instanceof FalseMatcher)) .collect(Collectors.toList()); return OrMatcher.create(ms); } return matcher; }
/** * Returns all but the first matcher from a sequence or True if there is only a single * matcher in the sequence. */ static Matcher tail(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else { return TrueMatcher.INSTANCE; } }
/** * Returns all but the first matcher from a sequence or True if there is only a single * matcher in the sequence. */ static Matcher tail(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else { return TrueMatcher.INSTANCE; } }
/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * If a character class is empty, then it will not match anything and can be treated * as false. */ static Matcher convertEmptyCharClassToFalse(Matcher matcher) { if (matcher instanceof CharClassMatcher) { return matcher.<CharClassMatcher>as().set().isEmpty() ? FalseMatcher.INSTANCE : matcher; } return matcher; }
/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * If a character class is empty, then it will not match anything and can be treated * as false. */ static Matcher convertEmptyCharClassToFalse(Matcher matcher) { if (matcher instanceof CharClassMatcher) { return matcher.<CharClassMatcher>as().set().isEmpty() ? FalseMatcher.INSTANCE : matcher; } return matcher; }
/** * If a character class has a single value, then just match that value ({@code "[a]" => "a"}). * This allows other optimizations to merge the value into adjacent matchers to get a larger * prefix or indexOf check. */ static Matcher convertSingleCharClassToSeq(Matcher matcher) { if (matcher instanceof CharClassMatcher) { Optional<Character> opt = matcher.<CharClassMatcher>as().set().character(); if (opt.isPresent()) { return new CharSeqMatcher(opt.get()); } } return matcher; }
/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }