public TreeAnnotator(HeadFinder hf, TreebankLangParserParams tlpp, Options op) { this.tlpParams = tlpp; this.hf = hf; this.tf = new LabeledScoredTreeFactory(); this.trainOptions = op.trainOptions; }
/** * Specify your own {@link TreeNormalizer}; * uses a {@link PennTreebankTokenizer}, and a {@link LabeledScoredTreeFactory}. * * @param tn The TreeNormalizer to use in building Tree objects to return. */ public PennTreeReaderFactory(TreeNormalizer tn) { this(new LabeledScoredTreeFactory(), tn); }
/** * Read parse trees from a {@code Reader}. * For the defaulted arguments, you get a * {@code SimpleTreeFactory}, no {@code TreeNormalizer}, and * a {@code PennTreebankTokenizer}. * * @param in The {@code Reader} */ public PennTreeReader(Reader in) { this(in, new LabeledScoredTreeFactory()); }
/** * Default constructor; uses a {@link LabeledScoredTreeFactory}, * with StringLabels, a {@link PennTreebankTokenizer}, * and a {@link TreeNormalizer}. */ public PennTreeReaderFactory() { this(new LabeledScoredTreeFactory()); }
/** * Return a {@code TreeFactory} that produces trees of the * {@code LabeledScoredTree{Node|Leaf}} type, with * the {@code Label} made with the supplied * {@code LabelFactory}. * The factory returned is a different one each time * * @param lf The LabelFactory to use * @return a factory to produce labeled, scored trees */ public static TreeFactory factory(LabelFactory lf) { return new LabeledScoredTreeFactory(lf); }
/** * An implementation of the <code>TreeReaderFactory</code> interface. * It creates a <code>TreeReader</code> which normalizes trees using * the <code>BobChrisTreeNormalizer</code>, and makes * <code>LabeledScoredTree</code> objects with * <code>CategoryWordTag</code> labels (unless otherwise specified on * construction). */ public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(lf), tm); } }
public Debinarizer(boolean forceCNF, LabelFactory lf) { this.forceCNF = forceCNF; tf = new LabeledScoredTreeFactory(lf); boundaryRemover = new BoundaryRemover(); }
public ExhaustiveDependencyParser(DependencyGrammar dg, Lexicon lex, Options op, Index<String> wordIndex, Index<String> tagIndex) { this.dg = dg; this.lex = lex; this.op = op; this.tlp = op.langpack(); this.wordIndex = wordIndex; this.tagIndex = tagIndex; tf = new LabeledScoredTreeFactory(); }
@Override public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(), new NPTmpRetainingTreeNormalizer(NPTmpRetainingTreeNormalizer.TEMPORAL_ACL03PCFG, false, 0, true)); }
@Override public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(), new NPTmpRetainingTreeNormalizer()); }
/** * Makes appropriate TreeReaderFactory with all options specified */ @Override public TreeReaderFactory treeReaderFactory() { return in -> new PennTreeReader(in, new LabeledScoredTreeFactory(), new NPTmpRetainingTreeNormalizer(englishTrain.splitTMP, englishTrain.splitSGapped == 5, englishTrain.leaveItAll, englishTrain.splitNPADV >= 1, headFinder())); }
/** * returns a MemoryTreebank appropriate to the testing treebank source */ @Override public MemoryTreebank testMemoryTreebank() { return new MemoryTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(), new BobChrisTreeNormalizer(tlp))); }
/** * An implementation of the <code>TreeReaderFactory</code> interface. * It creates a simple <code>TreeReader</code> which literally * reproduces trees in the treebank as <code>LabeledScoredTree</code> * objects, with <code>StringLabel</code> labels. */ public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory())); }
public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(), new SpanishTreeNormalizer(false, false, false), new PennTreebankTokenizer(in)); }
@Override public TreeReader newTreeReader(Reader in) { final NegraPennTreeNormalizer tn = new NegraPennTreeNormalizer(tlp, nodeCleanup); if (treeNormalizerInsertNPinPP) tn.setInsertNPinPP(true); return new PennTreeReader(in, new LabeledScoredTreeFactory(), tn, new NegraPennTokenizer(in)); }
public TreeReader newTreeReader(Reader in) { return new PennTreeReader(in, new LabeledScoredTreeFactory(), new FrenchTreeNormalizer(false), new PennTreebankTokenizer(in)); } }
public TreeReader newTreeReader(Reader in) { final TreeNormalizer tn1 = new GrammaticalFunctionTreeNormalizer(tlp, nodeCleanup); final TueBaDZPennTreeNormalizer tn2 = new TueBaDZPennTreeNormalizer(tlp, nodeCleanup); final TreeNormalizer norm = new OrderedCombinationTreeNormalizer(Arrays.asList(tn1, tn2)); return new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), norm); }
public TreeReader newTreeReader(Reader in) { TreeReader tr = null; if(noNormalization) { tr = new PennTreeReader(in, new LabeledScoredTreeFactory(), new TreeNormalizer(), new ArabicTreebankTokenizer(in)); } else tr = new PennTreeReader(in, new LabeledScoredTreeFactory(), new ArabicTreeNormalizer(retainNPTmp,retainPRD,changeNoLabels, retainNPSbj, retainPPClr), new ArabicTreebankTokenizer(in)); if (filterX) tr = new FilteringTreeReader(tr, new XFilter()); return tr; }
public static List<Tree> getTrees(String path, int low, int high, int minLength, int maxLength) { Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new WordFactory()), new BobChrisTreeNormalizer())); treebank.loadPath(path, new NumberRangeFileFilter(low, high, true)); List<Tree> trees = new ArrayList<>(); for (Tree tree : treebank) { if (tree.yield().size() <= maxLength && tree.yield().size() >= minLength) { trees.add(tree); } } return trees; }
public ArabicRawTreeNormalizer(PrintWriter outFile, PrintWriter flatFile) { encodingMap = (encoding == Encoding.UTF8) ? new Buckwalter() : new Buckwalter(true); this.outfile = outFile; this.flatFile = flatFile; nullFilter = new ArabicTreeNormalizer.ArabicEmptyFilter(); aOverAFilter = new AOverAFilter(); tf = new LabeledScoredTreeFactory(); tlp = new ArabicTreebankLanguagePack(); }