@Override public void init() { buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(punctSet); checkContextGenerator = new CheckContextGenerator(punctSet); }
public String[] getContext(Object o) { Object[] parts = (Object[]) o; return getContext((Parse[]) parts[0], (Integer) parts[1],(List<Parse>) parts[2], (Integer) parts[3]); }
public String[] getContext(Object o) { Object[] parts = (Object[]) o; return getContext((Parse[]) parts[0], (Integer) parts[1]); }
Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (numNodes == 0) { if (!isBuilt(advanceNode)) { break; int originalZeroIndex = mapParseIndex(0,children,originalChildren); int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren); List<Parse> newParsesList = new ArrayList<>(); buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs); double doneProb = bprobs[doneIndex]; if (debugOn) newParsesList.add(newParse1); if (checkComplete) { cprobs = checkModel.eval(checkContextGenerator.getContext(newNode, children, advanceNodeIndex,false)); if (debugOn) System.out.println("building " + tag + " " + bprob + " c=" + cprobs[completeIndex]); if (cprobs[completeIndex] > probMass) { //just complete advances setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing complete node"); setIncomplete(newNode); newParse1.addProb(Math.log(1 - cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing incomplete node");
if (etype == ParserEventTypeEnum.BUILD) { parseEvents.add(new Event(parent.getType(), buildContextGenerator.getContext(currentChunks, ci))); if (lastChild(chunks[ci], parent)) { if (etype == ParserEventTypeEnum.CHECK) { parseEvents.add(new Event(Parser.COMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false))); if (etype == ParserEventTypeEnum.CHECK) { parseEvents.add(new Event(Parser.INCOMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false))); parseEvents.add(new Event(Parser.DONE, buildContextGenerator.getContext(currentChunks, ci))); List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet); if (currentRightFrontier.size() != rightFrontier.size()) { System.err.println("fontiers mis-aligned: " + currentRightFrontier.size() + " != " Map<Parse, Integer> parents = getNonAdjoinedParent(chunks[ci]); if (debug) System.err.println("Looking at attachment site (" + cfi + "): " + cfn.getType() + " ci=" + i + " cs=" + nonPunctChildCount(cfn) + ", " + cfn + " :for " + currentChunks[ci].getType() + " " + currentChunks[ci] + " -> " + parents); if (attachNode == null && i != null && i == nonPunctChildCount(cfn)) { attachType = Parser.ATTACH_DAUGHTER;
Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); Parse pstart = children[0]; Parse pend = children[children.length - 1]; String type = parent.getType(); checkcons(pstart, "begin", type, features); checkcons(pend, "last", type, features); String production = "p=" + production(parent,false); String punctProduction = "pp=" + production(parent,true); features.add(production); features.add(punctProduction); rf = Parser.getRightFrontier(constituents[0], punctSet); if (trimFrontier) { int pi = rf.indexOf(parent); getFrontierNodes(rf,leftNodes); Parse p_1 = leftNodes[0]; Parse p_2 = leftNodes[1]; p2 = constituents[index + 2]; surround(p_1, -1, type, p_1s, features); surround(p_2, -2, type, p_2s, features); surround(p1, 1, type, p1s, features); surround(p2, 2, type, p2s, features);
rf = Parser.getRightFrontier(constituents[0], emptyPunctSet); getFrontierNodes(rf,leftNodes); Parse p_1 = leftNodes[0]; Parse p_2 = leftNodes[1]; String consp_2 = cons(p_2, -2); String consp_1 = cons(p_1, -1); String consp0 = cons(p0, 0); String consp1 = cons(p1, 1); String consp2 = cons(p2, 2); String consbop_2 = consbo(p_2, -2); String consbop_1 = consbo(p_1, -1); String consbop0 = consbo(p0, 0); String consbop1 = consbo(p1, 1); String consbop2 = consbo(p2, 2); cons2(features,c0,c1,punct1s,true); cons2(features,c_1,c0,punct_1s,true); cons3(features,c0,c1,c2,punct1s,punct2s,true,true,true); cons3(features,c_2,c_1,c0,punct_2s,punct_1s,true,true,true); cons3(features,c_1,c0,c1,punct_1s,punct_1s,true,true,true);
public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut) throws IOException { TrainingParameters params = new TrainingParameters(); params.put("dict", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut); params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("check", TrainingParameters.CUTOFF_PARAM, cut); params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("build", TrainingParameters.CUTOFF_PARAM, cut); params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations); return train(languageCode, parseSamples, rules, params); }
private int indexOf(Parse child, Parse parent) { Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet); for (int ki = 0; ki < kids.length; ki++) { if (child == kids[ki]) { return ki; } } return -1; }
private void setIncomplete(Parse p) { if (!isBuilt(p)) { p.setLabel(Parser.INCOMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } }
@Override protected Parse[] advanceChunks(Parse p, double minChunkScore) { Parse[] parses = super.advanceChunks(p, minChunkScore); for (Parse parse : parses) { Parse[] chunks = parse.getChildren(); for (int ci = 0; ci < chunks.length; ci++) { setComplete(chunks[ci]); } } return parses; }
/** * Returns a set of parent nodes which consist of the immediate * parent of the specified node and any of its parent which * share the same syntactic type. * @param node The node whose parents are to be returned. * @return a set of parent nodes. */ private Map<Parse, Integer> getNonAdjoinedParent(Parse node) { Map<Parse, Integer> parents = new HashMap<>(); Parse parent = node.getParent(); int index = indexOf(node,parent); parents.put(parent, index); while (parent.getType().equals(node.getType())) { node = parent; parent = parent.getParent(); index = indexOf(node,parent); parents.put(parent, index); } return parents; }
private void setBuilt(Parse p) { String l = p.getLabel(); if (l == null) { p.setLabel(Parser.BUILT); } else { if (isComplete(p)) { p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } } }
public static Parser create(ParserModel model, int beamSize, double advancePercentage) { if (ParserType.CHUNKING.equals(model.getParserType())) { return new opennlp.tools.parser.chunking.Parser(model, beamSize, advancePercentage); } else if (ParserType.TREEINSERT.equals(model.getParserType())) { return new opennlp.tools.parser.treeinsert.Parser(model, beamSize, advancePercentage); } else { throw new IllegalStateException("Unexpected ParserType: " + model.getParserType().name()); } }
private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, double advancePercentage) { super(tagger,chunker,headRules,beamSize,advancePercentage); this.buildModel = buildModel; this.attachModel = attachModel; this.checkModel = checkModel; this.buildContextGenerator = new BuildContextGenerator(); this.attachContextGenerator = new AttachContextGenerator(punctSet); this.checkContextGenerator = new CheckContextGenerator(punctSet); this.bprobs = new double[buildModel.getNumOutcomes()]; this.aprobs = new double[attachModel.getNumOutcomes()]; this.cprobs = new double[checkModel.getNumOutcomes()]; this.doneIndex = buildModel.getIndex(DONE); this.sisterAttachIndex = attachModel.getIndex(ATTACH_SISTER); this.daughterAttachIndex = attachModel.getIndex(ATTACH_DAUGHTER); this.nonAttachIndex = attachModel.getIndex(NON_ATTACH); attachments = new int[] {daughterAttachIndex,sisterAttachIndex}; this.completeIndex = checkModel.getIndex(Parser.COMPLETE); }
/** * Verify that training and tagging does not cause * runtime problems. */ @Test public void testTreeInsertParserTraining() throws Exception { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0); opennlp.tools.parser.Parser parser = ParserFactory.create(model); // Tests parsing to make sure the code does not has // a bug which fails always with a runtime exception parser.parse(Parse.parseParse("She was just another freighter from the " + "States and she seemed as commonplace as her name .")); // Test serializing and de-serializing model ByteArrayOutputStream outArray = new ByteArrayOutputStream(); model.serialize(outArray); outArray.close(); new ParserModel(new ByteArrayInputStream(outArray.toByteArray())); // TODO: compare both models } }
private int nonPunctChildCount(Parse node) { return Parser.collapsePunctuation(node.getChildren(),punctSet).length; } /*
private void setComplete(Parse p) { String l = p.getLabel(); if (!isBuilt(p)) { p.setLabel(Parser.COMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } }
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException { CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next(); ParserModel model; if (ParserType.CHUNKING.equals(parserType)) { model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params); } else if (ParserType.TREEINSERT.equals(parserType)) { model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params); } else { throw new IllegalStateException("Unexpected parser type: " + parserType); } ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
/** * Returns the right frontier of the specified parse tree with nodes ordered from deepest * to shallowest. * @param root The root of the parse tree. * @return The right frontier of the specified parse tree. */ public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) { List<Parse> rf = new LinkedList<>(); Parse top; if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) || AbstractBottomUpParser.INC_NODE.equals(root.getType())) { top = collapsePunctuation(root.getChildren(),punctSet)[0]; } else { top = root; } while (!top.isPosTag()) { rf.add(0,top); Parse[] kids = top.getChildren(); top = kids[kids.length - 1]; } return new ArrayList<>(rf); }