/** * Load trees from given path specification. Passes the path and * filter to the underlying treebank. * * @param path file or directory to load from * @param filt a FilenameFilter of files to load */ @Override public void loadPath(File path, FileFilter filt) { treebank.loadPath(path, filt); }
/** * Load a sequence of trees from given file or directory and its subdirectories. * Either this loads from a directory (tree) and * trees must reside in files with the suffix "mrg" (this is an English * Penn Treebank holdover!), * or it loads a single file with the given path (including extension) * * @param path File specification */ public void loadPath(File path) { loadPath(path, DEFAULT_TREE_FILE_SUFFIX, true); }
/** * Load a sequence of trees from given directory and its subdirectories. * Trees should reside in files with the suffix "mrg". * Or: load a single file with the given pathName (including extension) * * @param pathName file or directory name */ public void loadPath(String pathName) { loadPath(new File(pathName)); }
/** * Load a sequence of trees from given directory and its subdirectories * which match the file filter. * Or: load a single file with the given pathName (including extension) * * @param pathName file or directory name * @param filt A filter used to determine which files match */ public void loadPath(String pathName, FileFilter filt) { loadPath(new File(pathName), filt); }
/** * Load trees from given directory. * * @param path file or directory to load from * @param suffix suffix of files to load * @param recursively descend into subdirectories as well */ public void loadPath(File path, String suffix, boolean recursively) { loadPath(path, new ExtensionFileFilter(suffix, recursively)); }
/** * Load trees from given directory. * * @param pathName File or directory name * @param suffix Extension of files to load: If {@code pathName} * is a directory, then, if this is * non-{@code null}, all and only files ending in "." followed * by this extension will be loaded; if it is {@code null}, * all files in directories will be loaded. If {@code pathName} * is not a directory, this parameter is ignored. * @param recursively descend into subdirectories as well */ public void loadPath(String pathName, String suffix, boolean recursively) { loadPath(new File(pathName), new ExtensionFileFilter(suffix, recursively)); }
public TreeTaggedFileReader(TaggedFileRecord record) { filename = record.file; trf = record.trf == null ? new LabeledScoredTreeReaderFactory() : record.trf; transformer = record.treeTransformer; normalizer = record.treeNormalizer; treeFilter = record.treeFilter; treebank = new DiskTreebank(trf, record.encoding); if (record.treeRange != null) { treebank.loadPath(filename, record.treeRange); } else { treebank.loadPath(filename); } treeIterator = treebank.iterator(); findNext(); }
private static Treebank makeTreebank(String treebankPath, Options op, FileFilter filt) { log.info("Training a parser from treebank dir: " + treebankPath); Treebank trainTreebank = op.tlpParams.diskTreebank(); log.info("Reading trees..."); if (filt == null) { trainTreebank.loadPath(treebankPath); } else { trainTreebank.loadPath(treebankPath, filt); } Timing.tick("done [read " + trainTreebank.size() + " trees]."); return trainTreebank; }
/** * Loads treebank grammar from first argument and prints it. * Just a demonstration of functionality. * <p> * {@code usage: java MemoryTreebank treebankFilesPath} * * @param args array of command-line arguments */ public static void main(String[] args) { Timing.startTime(); Treebank treebank = new MemoryTreebank(PennTreeReader::new); treebank.loadPath(args[0]); Timing.endTime(); System.out.println(treebank); }
private static Treebank makeTreebank(String treebankPath, Options op, FileFilter filt) { log.info("Training a segmenter from treebank dir: " + treebankPath); Treebank trainTreebank = op.tlpParams.memoryTreebank(); log.info("Reading trees..."); if (filt == null) { trainTreebank.loadPath(treebankPath); } else { trainTreebank.loadPath(treebankPath, filt); } Timing.tick("done [read " + trainTreebank.size() + " trees]."); return trainTreebank; }
/** * Reads, stems, and prints the trees in the file. * * @param args Usage: WordStemmer file */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); treebank.loadPath(args[0]); WordStemmer ls = new WordStemmer(); for (Tree tree : treebank) { ls.visitTree(tree); System.out.println(tree); } }
public static void main(String[] args) { TreebankLangParserParams tlpp = new EnglishTreebankParserParams(); Treebank tb = tlpp.memoryTreebank(); tb.loadPath(args[0]); for (Tree t : tb) { t.pennPrint(); } }
public static void main(String[] args) { // simple testing code Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new NoPunctuationHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
private static Iterator<Tree> treebankIterator(String path) { /* Remove empty nodes and strip indices from internal nodes but keep functional tags. */ Treebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false)); tb.loadPath(path); return tb.iterator(); }
public void runTest(String[] args) { // get a parser from file LexicalizedParser pd = LexicalizedParser.loadModel(args[0]); op = pd.getOp(); // in case a serialized options was read in Treebank testTreebank = op.tlpParams.memoryTreebank(); int testlow = Integer.parseInt(args[2]); int testhigh = Integer.parseInt(args[3]); testTreebank.loadPath(args[1], new NumberRangeFileFilter(testlow, testhigh, true)); op.setOptionsOrWarn(args, 4, args.length); testOnTreebank(pd, new EnglishTreebankParserParams(), testTreebank, args[1], pd.stateIndex); } }
public Treebank readTreebank(String treebankPath, FileFilter treebankFilter) { log.info("Loading trees from " + treebankPath); Treebank treebank = op.tlpParams.memoryTreebank(); treebank.loadPath(treebankPath, treebankFilter); log.info("Read in " + treebank.size() + " trees from " + treebankPath); return treebank; }
/** * Go through trees and determine their heads and print them. * Just for debuggin'. <br> * Usage: <code> * java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new CollinsHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * This is hardwired to calculate the split categories from English * Penn Treebank sections 2-21 with a default cutoff of 300 (as used * in ACL03PCFG). It was added to upgrading of code in cases where no * Treebank was available, and the pre-stored list was being used). */ public static Set<String> getEnglishSplitCategories(String treebankRoot) { TreebankLangParserParams tlpParams = new EnglishTreebankParserParams(); Treebank trees = tlpParams.memoryTreebank(); trees.loadPath(treebankRoot, new NumberRangeFileFilter(200, 2199, true)); return getSplitCategories(trees, 300.0, tlpParams.treebankLanguagePack()); }
public static List<Tree> getTrees(String path, int low, int high, int minLength, int maxLength) { Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new WordFactory()), new BobChrisTreeNormalizer())); treebank.loadPath(path, new NumberRangeFileFilter(low, high, true)); List<Tree> trees = new ArrayList<>(); for (Tree tree : treebank) { if (tree.yield().size() <= maxLength && tree.yield().size() >= minLength) { trees.add(tree); } } return trees; }
/** * For testing: loads a treebank and prints the trees. */ public static void main(String[] args) { TreebankLangParserParams tlpp = new ChineseTreebankParserParams(); System.out.println("Default encoding is: " + tlpp.diskTreebank().encoding()); if (args.length < 2) { printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange"); } else { Treebank m = tlpp.diskTreebank(); m.loadPath(args[0], new NumberRangesFileFilter(args[1], false)); for (Tree t : m ) { t.pennPrint(tlpp.pw()); } System.out.println("There were " + m.size() + " trees."); } }