/** * If a char sequence is adjacent to a starts with matcher, then append the sequence to * the prefix pattern of the starts with matcher. */ static Matcher combineCharSeqAfterStartsWith(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartsWithMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); String prefix = matchers.get(0).<StartsWithMatcher>as().pattern() + matchers.get(1).<CharSeqMatcher>as().pattern(); ms.add(new StartsWithMatcher(prefix)); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } else { return matcher; } } return matcher; }
/** * If the matcher has a start anchored character sequence, then replace it with a * StartsWithMatcher. In a tight loop this is much faster than a running with a sequence * of two matchers. */ static Matcher startsWithCharSeq(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); ms.add(new StartsWithMatcher(matchers.get(1).<CharSeqMatcher>as().pattern())); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } } return matcher; }
/** * If the matcher has a start anchored character sequence, then replace it with a * StartsWithMatcher. In a tight loop this is much faster than a running with a sequence * of two matchers. */ static Matcher startsWithCharSeq(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); ms.add(new StartsWithMatcher(matchers.get(1).<CharSeqMatcher>as().pattern())); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } } return matcher; }
/** * If a char sequence is adjacent to a starts with matcher, then append the sequence to * the prefix pattern of the starts with matcher. */ static Matcher combineCharSeqAfterStartsWith(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartsWithMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); String prefix = matchers.get(0).<StartsWithMatcher>as().pattern() + matchers.get(1).<CharSeqMatcher>as().pattern(); ms.add(new StartsWithMatcher(prefix)); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } else { return matcher; } } return matcher; }
@Test public void optimizeStartsWith() { PatternMatcher actual = PatternMatcher.compile("^foo"); PatternMatcher expected = new StartsWithMatcher("foo"); Assertions.assertEquals(expected, actual); }
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }
@Test public void optimizeDuplicateOr() { PatternMatcher actual = PatternMatcher.compile("^(abc|a(bc)|((a)(b))c)"); PatternMatcher expected = new StartsWithMatcher("abc"); Assertions.assertEquals(expected, actual); }
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }
@Test public void combineCharSeqAfterStartsWith() { Matcher input = SeqMatcher.create( new StartsWithMatcher("a"), new CharSeqMatcher("b"), AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create( new StartsWithMatcher("ab"), AnyMatcher.INSTANCE ); Assertions.assertEquals(expected, Optimizer.combineCharSeqAfterStartsWith(input)); }
@Test public void optimizeOrSimple() { PatternMatcher actual = PatternMatcher.compile("^abc|def|ghi"); PatternMatcher expected = OrMatcher.create( new StartsWithMatcher("abc"), new IndexOfMatcher("def", TrueMatcher.INSTANCE), new IndexOfMatcher("ghi", TrueMatcher.INSTANCE) ); Assertions.assertEquals(expected, actual); }
@Test public void optimizeOptionValue() { PatternMatcher actual = PatternMatcher.compile("^a?a"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("a"), OrMatcher.create(new CharSeqMatcher("a"), TrueMatcher.INSTANCE) ); Assertions.assertEquals(expected, actual); }
@Test public void startsWithCharSeq() { Matcher input = SeqMatcher.create( StartMatcher.INSTANCE, new CharSeqMatcher("ab"), AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create( new StartsWithMatcher("ab"), AnyMatcher.INSTANCE ); Assertions.assertEquals(expected, Optimizer.startsWithCharSeq(input)); }
@Test public void optimizeOrPrefix() { PatternMatcher actual = PatternMatcher.compile("^(abc123|abc456)"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("abc"), new OrMatcher(new CharSeqMatcher("123"), new CharSeqMatcher("456")) ); Assertions.assertEquals(expected, actual); }
@Test public void optimizeOrPrefixPattern() { PatternMatcher actual = PatternMatcher.compile("^abc.*foo$|^abc.*bar$|^abc.*baz$"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("abc"), new OrMatcher( new IndexOfMatcher("foo", EndMatcher.INSTANCE), new IndexOfMatcher("bar", EndMatcher.INSTANCE), new IndexOfMatcher("baz", EndMatcher.INSTANCE) ) ); Assertions.assertEquals(expected, actual); }
@Test public void optimizeNegativeLookaheadOr() { PatternMatcher actual = PatternMatcher.compile("^^abc.def(?!.*(1000|1500))"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("abc"), AnyMatcher.INSTANCE, new CharSeqMatcher("def"), new NegativeLookaheadMatcher(new IndexOfMatcher( "1", OrMatcher.create(new CharSeqMatcher("000"), new CharSeqMatcher("500")) )) ); Assertions.assertEquals(expected, actual); } }