this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor;
new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
/** Use the DependencyGrammar to score the tree. * * @param tr A binarized tree (as returned by the PCFG parser * @return The score for the tree according to the grammar */ private double depScoreTree(Tree tr) { // log.info("Here's our tree:"); // tr.pennPrint(); // log.info(Trees.toDebugStructureString(tr)); Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory()); cwtTree.percolateHeads(binHeadFinder); // log.info("Here's what it went to:"); // cwtTree.pennPrint(); List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex); // log.info("Here's the deps:\n" + deps); return dg.scoreAll(deps); }
public TreeAnnotatorAndBinarizer(HeadFinder annotationHF, HeadFinder binarizationHF, TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization, Options op) { this.trainOptions = op.trainOptions; if (doSubcategorization) { annotator = new TreeAnnotator(annotationHF, tlpParams, op); } else { annotator = new TreeNullAnnotator(annotationHF); } binarizer = new TreeBinarizer(binarizationHF, tlpParams.treebankLanguagePack(), insideFactor, trainOptions.markovFactor, trainOptions.markovOrder, trainOptions.compactGrammar() > 0, trainOptions.compactGrammar() > 1, trainOptions.HSEL_CUT, trainOptions.markFinalStates, trainOptions.simpleBinarizedLabels, trainOptions.noRebinarization); if (trainOptions.selectivePostSplit) { postSplitter = new PostSplitter(tlpParams, op); } else { postSplitter = null; } this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.tlp = tlpParams.treebankLanguagePack(); this.forceCNF = forceCNF; if (trainOptions.printAnnotatedRuleCounts) { annotatedRuleCounts = new ClassicCounter<>(); } else { annotatedRuleCounts = null; } if (trainOptions.printAnnotatedStateCounts) { annotatedStateCounts = new ClassicCounter<>(); } else { annotatedStateCounts = null; } }
debinarizer = new Debinarizer(op.forceCNF, new CategoryWordTagFactory()); subcategoryStripper = op.tlpParams.subcategoryStripper();
/** Build a custom binarizer for Trees. * * @param hf the HeadFinder to use in binarization * @param tlp the TreebankLanguagePack to use * @param insideFactor whether to do inside markovization * @param markovFactor whether to markovize the binary rules * @param markovOrder the markov order to use; only relevant with markovFactor=true * @param useWrappingLabels whether to use state names (labels) that allow wrapping from right to left * @param unaryAtTop Whether to actually materialize the unary that rewrites * a passive state to the active rule at the top of an original local * tree. This is used only when compaction is happening * @param selectiveSplitThreshold if selective split is used, this will be the threshold used to decide which state splits to keep * @param markFinalStates whether or not to make the state names (labels) of the final active states distinctive */ public TreeBinarizer(HeadFinder hf, TreebankLanguagePack tlp, boolean insideFactor, boolean markovFactor, int markovOrder, boolean useWrappingLabels, boolean unaryAtTop, double selectiveSplitThreshold, boolean markFinalStates) { this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor; this.markovOrder = markovOrder; this.useWrappingLabels = useWrappingLabels; this.unaryAtTop = unaryAtTop; this.selectiveSplitThreshold = selectiveSplitThreshold; this.markFinalStates = markFinalStates; }
this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor;
this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor;
this.hf = hf; this.tlp = tlp; this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.insideFactor = insideFactor; this.markovFactor = markovFactor;
public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory( new CategoryWordTagFactory()), new BobChrisTreeNormalizer()); } };
public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory( new CategoryWordTagFactory()), new BobChrisTreeNormalizer()); } };
return new PennTreeReader(in, new LabeledScoredTreeFactory( new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
return new PennTreeReader(in, new LabeledScoredTreeFactory( new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
/** Use the DependencyGrammar to score the tree. * * @param tr A binarized tree (as returned by the PCFG parser * @return The score for the tree according to the grammar */ private double depScoreTree(Tree tr) { // log.info("Here's our tree:"); // tr.pennPrint(); // log.info(Trees.toDebugStructureString(tr)); Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory()); cwtTree.percolateHeads(binHeadFinder); // log.info("Here's what it went to:"); // cwtTree.pennPrint(); List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex); // log.info("Here's the deps:\n" + deps); return dg.scoreAll(deps); }
/** Use the DependencyGrammar to score the tree. * * @param tr A binarized tree (as returned by the PCFG parser * @return The score for the tree according to the grammar */ private double depScoreTree(Tree tr) { // log.info("Here's our tree:"); // tr.pennPrint(); // log.info(Trees.toDebugStructureString(tr)); Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory()); cwtTree.percolateHeads(binHeadFinder); // log.info("Here's what it went to:"); // cwtTree.pennPrint(); List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex); // log.info("Here's the deps:\n" + deps); return dg.scoreAll(deps); }
/** Use the DependencyGrammar to score the tree. * * @param tr A binarized tree (as returned by the PCFG parser * @return The score for the tree according to the grammar */ private double depScoreTree(Tree tr) { // System.err.println("Here's our tree:"); // tr.pennPrint(); // System.err.println(Trees.toDebugStructureString(tr)); Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory()); cwtTree.percolateHeads(binHeadFinder); // System.err.println("Here's what it went to:"); // cwtTree.pennPrint(); List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree); // System.err.println("Here's the deps:\n" + deps); return dg.scoreAll(deps); }
/** Use the DependencyGrammar to score the tree. * * @param tr A binarized tree (as returned by the PCFG parser * @return The score for the tree according to the grammar */ private double depScoreTree(Tree tr) { // System.err.println("Here's our tree:"); // tr.pennPrint(); // System.err.println(Trees.toDebugStructureString(tr)); Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory()); cwtTree.percolateHeads(binHeadFinder); // System.err.println("Here's what it went to:"); // cwtTree.pennPrint(); List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex); // System.err.println("Here's the deps:\n" + deps); return dg.scoreAll(deps); }
public TreeAnnotatorAndBinarizer(HeadFinder annotationHF, HeadFinder binarizationHF, TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization) { if (doSubcategorization) { annotator = new TreeAnnotator(annotationHF, tlpParams); } else { annotator = new TreeNullAnnotator(annotationHF); } binarizer = new TreeBinarizer(binarizationHF, tlpParams.treebankLanguagePack(), insideFactor, Train.markovFactor, Train.markovOrder, Train.compactGrammar() > 0, Train.compactGrammar() > 1, Train.HSEL_CUT, Train.markFinalStates); if (Train.selectivePostSplit) { postSplitter = new PostSplitter(tlpParams); } this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory()); this.tlp = tlpParams.treebankLanguagePack(); this.forceCNF = forceCNF; if (Train.printAnnotatedRuleCounts) { annotatedRuleCounts = new ClassicCounter<Tree>(); } if (Train.printAnnotatedStateCounts) { annotatedStateCounts = new ClassicCounter<String>(); } }