public List<Derivation> parseSyntactic(String input){ List<String> tokens = tokenizer.tokenize(input); List<String> tokensLower = new ArrayList<>(tokens.size()); for(String token: tokens) tokensLower.add(token.toLowerCase()); int N = tokens.size(); Chart chart = new Chart(N+1); for(int e = 1; e <= N; e++) { for(int s = e-1; s >= 0; s--) { applyAnnotators(chart, tokens, s, e); applyLexicalRules(chart, tokensLower, s, e); applyBinaryRules(chart, s, e); applyUnaryRules(chart, s, e); } } List<Derivation> derivations = new LinkedList<>(); for(Derivation d: chart.getDerivations(0, N)) if(grammar.isRoot(d.rule)) derivations.add(d); return derivations; }
@Test void tokenize() { Tokenizer tokenizer = new BasicTokenizer(); String example = "$100, 10:45 1/2/3 4-5-6 1st 2nd 3RD 4th 10pm 3May"; List<String> expected = Arrays.asList( "$100", "1045", "1", "2", "3", "4", "5", "6", "1", "2", "3", "4", "10", "pm", "3", "May" ); assertEquals(expected, tokenizer.tokenize(example)); } }