boolean isSpanFull(int start, int end) { LinkedList<Derivation> dl = getDerivations(start, end); int size = dl.size(); if(size > MAX_CAPACITY_PER_SPAN){ System.err.println(String.format("Exceeded max capacity[%d-%d]: %s", start, end, dl.getLast().rule)); return true; } return false; }
void addDerivation(int start, int end, Derivation derivation) { getDerivations(start, end).add(derivation); }
void applyUnaryRules(Chart chart, int start, int end) { LinkedList<Derivation> queue = new LinkedList<>(chart.getDerivations(start, end)); while(!queue.isEmpty()) { Derivation d = queue.removeFirst(); for(Rule rule: grammar.getUnaryRules(d.rule.getLHS())) { if(chart.isSpanFull(start, end)) return; //System.out.println(rule); Derivation parent = new Derivation(rule, Collections.singletonList(d)); queue.addLast(parent); chart.addDerivation(start, end, parent); } } }
public List<Derivation> parseSyntactic(String input){ List<String> tokens = tokenizer.tokenize(input); List<String> tokensLower = new ArrayList<>(tokens.size()); for(String token: tokens) tokensLower.add(token.toLowerCase()); int N = tokens.size(); Chart chart = new Chart(N+1); for(int e = 1; e <= N; e++) { for(int s = e-1; s >= 0; s--) { applyAnnotators(chart, tokens, s, e); applyLexicalRules(chart, tokensLower, s, e); applyBinaryRules(chart, s, e); applyUnaryRules(chart, s, e); } } List<Derivation> derivations = new LinkedList<>(); for(Derivation d: chart.getDerivations(0, N)) if(grammar.isRoot(d.rule)) derivations.add(d); return derivations; }
void applyBinaryRules(Chart chart, int start, int end) { if(end > start + 1) { for(int mid = start + 1; mid < end; mid++) { List<Derivation> left = chart.getDerivations(start, mid); List<Derivation> right = chart.getDerivations(mid, end); for(Derivation l:left) { for(Derivation r: right) { for(Rule rule: grammar.getBinaryRules(l.rule.getLHS(), r.rule.getLHS())) { if(chart.isSpanFull(start, end)) return; //System.out.println(rule); chart.addDerivation(start, end, new Derivation(rule, Arrays.asList(l, r))); } } } } } }
@Test void applyBinaryRules() { List<Rule> rules = Collections.singletonList( new Rule("$C", "$A $B")); Grammar grammar = new Grammar(rules, "$ROOT"); Parser p = new Parser(grammar, null, null); Parser.Chart chart = p.new Chart(10); chart.addDerivation(0, 1, new Derivation(new Rule("$A", "A"), null)); chart.addDerivation(1, 2, new Derivation(new Rule("$B", "B"), null)); p.applyBinaryRules(chart, 0, 2); assertEquals(rules.get(0), chart.getDerivations(0, 2).get(0).rule); } }
@Test void applyUnaryRules() { List<Rule> rules = Arrays.asList( new Rule("$F", "$E"), new Rule("$E", "$D")); Grammar grammar = new Grammar(rules, "$ROOT"); Parser p = new Parser(grammar, null, null); Parser.Chart chart = p.new Chart(10); chart.addDerivation(1, 3, new Derivation(new Rule("$D", "$B $C"), null)); p.applyUnaryRules(chart, 1, 3); assertEquals(3, chart.getDerivations(1, 3).size()); assertEquals(rules.get(0), chart.getDerivations(1, 3).get(2).rule); assertEquals(rules.get(1), chart.getDerivations(1, 3).get(1).rule); }
@Test void chartRetrieval(){ Parser p = new Parser(null, null, null); Parser.Chart chart = p.new Chart(10); assertEquals(32, chart.mapSpan(2, 3)); Derivation d = new Derivation(null, null); chart.addDerivation(3, 5, d); assertEquals(1, chart.getDerivations(3, 5).size()); assertEquals(d, chart.getDerivations(3, 5).get(0)); }
@Test void applyLexicalRules() { List<Rule> rules = Collections.singletonList(new Rule("$A", "B C")); Grammar grammar = new Grammar(rules, "$ROOT"); Parser p = new Parser(grammar, null, null); Parser.Chart chart = p.new Chart(10); List<String> tokens = Arrays.asList("A", "B", "C"); p.applyLexicalRules(chart, tokens, 1, 3); assertEquals(rules.get(0), chart.getDerivations(1, 3).get(0).rule); }
@Test void applyAnnotators() { Parser p = new Parser(null, null, Collections.singletonList(PhraseAnnotator.INSTANCE)); Parser.Chart chart = p.new Chart(10); List<String> tokens = Arrays.asList("A", "B", "C"); Rule r = PhraseAnnotator.INSTANCE.annotate(tokens).get(0); p.applyAnnotators(chart, tokens, 0, 3); assertEquals(r, chart.getDerivations(0, 3).get(0).rule); }