edu.stanford.nlp.trees.Treebank.loadPath java code examples

/**
 * Load trees from given path specification.  Passes the path and
 * filter to the underlying treebank.
 *
 * @param path file or directory to load from
 * @param filt a FilenameFilter of files to load
 */
@Override
public void loadPath(File path, FileFilter filt) {
 treebank.loadPath(path, filt);
}

/**
 * Load a sequence of trees from given file or directory and its subdirectories.
 * Either this loads from a directory (tree) and
 * trees must reside in files with the suffix "mrg" (this is an English
 * Penn Treebank holdover!),
 * or it loads a single file with the given path (including extension)
 *
 * @param path File specification
 */
public void loadPath(File path) {
 loadPath(path, DEFAULT_TREE_FILE_SUFFIX, true);
}

/**
 * Load a sequence of trees from given directory and its subdirectories.
 * Trees should reside in files with the suffix "mrg".
 * Or: load a single file with the given pathName (including extension)
 *
 * @param pathName file or directory name
 */
public void loadPath(String pathName) {
 loadPath(new File(pathName));
}

/**
 * Load a sequence of trees from given directory and its subdirectories
 * which match the file filter.
 * Or: load a single file with the given pathName (including extension)
 *
 * @param pathName file or directory name
 * @param filt     A filter used to determine which files match
 */
public void loadPath(String pathName, FileFilter filt) {
 loadPath(new File(pathName), filt);
}

/**
 * Load trees from given directory.
 *
 * @param path        file or directory to load from
 * @param suffix      suffix of files to load
 * @param recursively descend into subdirectories as well
 */
public void loadPath(File path, String suffix, boolean recursively) {
 loadPath(path, new ExtensionFileFilter(suffix, recursively));
}

/**
 * Load trees from given directory.
 *
 * @param pathName    File or directory name
 * @param suffix      Extension of files to load: If {@code pathName}
 *                    is a directory, then, if this is
 *                    non-{@code null}, all and only files ending in "." followed
 *                    by this extension will be loaded; if it is {@code null},
 *                    all files in directories will be loaded.  If {@code pathName}
 *                    is not a directory, this parameter is ignored.
 * @param recursively descend into subdirectories as well
 */
public void loadPath(String pathName, String suffix, boolean recursively) {
 loadPath(new File(pathName), new ExtensionFileFilter(suffix, recursively));
}

public TreeTaggedFileReader(TaggedFileRecord record) {
 filename = record.file;
 trf = record.trf == null ? new LabeledScoredTreeReaderFactory() : record.trf;
 transformer = record.treeTransformer;
 normalizer = record.treeNormalizer;
 treeFilter = record.treeFilter;
 treebank = new DiskTreebank(trf, record.encoding);
 if (record.treeRange != null) {
  treebank.loadPath(filename, record.treeRange);
 } else {
  treebank.loadPath(filename);
 }
 treeIterator = treebank.iterator();
 findNext();
}

private static Treebank makeTreebank(String treebankPath, Options op, FileFilter filt) {
 log.info("Training a parser from treebank dir: " + treebankPath);
 Treebank trainTreebank = op.tlpParams.diskTreebank();
 log.info("Reading trees...");
 if (filt == null) {
  trainTreebank.loadPath(treebankPath);
 } else {
  trainTreebank.loadPath(treebankPath, filt);
 }
 Timing.tick("done [read " + trainTreebank.size() + " trees].");
 return trainTreebank;
}

/**
 * Loads treebank grammar from first argument and prints it.
 * Just a demonstration of functionality.
 * <p>
 * {@code usage: java MemoryTreebank treebankFilesPath}
 *
 * @param args array of command-line arguments
 */
public static void main(String[] args) {
 Timing.startTime();
 Treebank treebank = new MemoryTreebank(PennTreeReader::new);
 treebank.loadPath(args[0]);
 Timing.endTime();
 System.out.println(treebank);
}

private static Treebank makeTreebank(String treebankPath, Options op, FileFilter filt) {
 log.info("Training a segmenter from treebank dir: " + treebankPath);
 Treebank trainTreebank = op.tlpParams.memoryTreebank();
 log.info("Reading trees...");
 if (filt == null) {
  trainTreebank.loadPath(treebankPath);
 } else {
  trainTreebank.loadPath(treebankPath, filt);
 }
 Timing.tick("done [read " + trainTreebank.size() + " trees].");
 return trainTreebank;
}

/**
 * Reads, stems, and prints the trees in the file.
 *
 * @param args Usage: WordStemmer file
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 treebank.loadPath(args[0]);
 WordStemmer ls = new WordStemmer();
 for (Tree tree : treebank) {
  ls.visitTree(tree);
  System.out.println(tree);
 }
}

public static void main(String[] args) {
 TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
 Treebank tb = tlpp.memoryTreebank();
 tb.loadPath(args[0]);
 for (Tree t : tb) {
  t.pennPrint();
 }
}

public static void main(String[] args) {
 // simple testing code
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new NoPunctuationHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

private static Iterator<Tree> treebankIterator(String path) {
 /* Remove empty nodes and strip indices from internal nodes but keep
   functional tags. */
 Treebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
 tb.loadPath(path);
 return tb.iterator();
}

 public void runTest(String[] args) {
  // get a parser from file
  LexicalizedParser pd = LexicalizedParser.loadModel(args[0]);
  op = pd.getOp(); // in case a serialized options was read in
  Treebank testTreebank = op.tlpParams.memoryTreebank();
  int testlow = Integer.parseInt(args[2]);
  int testhigh = Integer.parseInt(args[3]);
  testTreebank.loadPath(args[1], new NumberRangeFileFilter(testlow, testhigh, true));
  op.setOptionsOrWarn(args, 4, args.length);
  testOnTreebank(pd, new EnglishTreebankParserParams(), testTreebank, args[1], pd.stateIndex);
 }
}

public Treebank readTreebank(String treebankPath, FileFilter treebankFilter) {
 log.info("Loading trees from " + treebankPath);
 Treebank treebank = op.tlpParams.memoryTreebank();
 treebank.loadPath(treebankPath, treebankFilter);
 log.info("Read in " + treebank.size() + " trees from " + treebankPath);
 return treebank;
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debuggin'. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new CollinsHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

/**
 * This is hardwired to calculate the split categories from English
 * Penn Treebank sections 2-21 with a default cutoff of 300 (as used
 * in ACL03PCFG).  It was added to upgrading of code in cases where no
 * Treebank was available, and the pre-stored list was being used).
 */
public static Set<String> getEnglishSplitCategories(String treebankRoot) {
 TreebankLangParserParams tlpParams = new EnglishTreebankParserParams();
 Treebank trees = tlpParams.memoryTreebank();
 trees.loadPath(treebankRoot, new NumberRangeFileFilter(200, 2199, true));
 return getSplitCategories(trees, 300.0, tlpParams.treebankLanguagePack());
}

public static List<Tree> getTrees(String path, int low, int high, int minLength, int maxLength) {
 Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new WordFactory()), new BobChrisTreeNormalizer()));
 treebank.loadPath(path, new NumberRangeFileFilter(low, high, true));
 List<Tree> trees = new ArrayList<>();
 for (Tree tree : treebank) {
  if (tree.yield().size() <= maxLength && tree.yield().size() >= minLength) {
   trees.add(tree);
  }
 }
 return trees;
}

/**
 * For testing: loads a treebank and prints the trees.
 */
public static void main(String[] args) {
 TreebankLangParserParams tlpp = new ChineseTreebankParserParams();
 System.out.println("Default encoding is: " +
           tlpp.diskTreebank().encoding());
 if (args.length < 2) {
  printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange");
 } else {
  Treebank m = tlpp.diskTreebank();
  m.loadPath(args[0], new NumberRangesFileFilter(args[1], false));
  for (Tree t : m ) {
   t.pennPrint(tlpp.pw());
  }
  System.out.println("There were " + m.size() + " trees.");
 }
}

Javadoc

Load a sequence of trees from given file or directory and its subdirectories. Either this loads from a directory (tree) and trees must reside in files with the suffix "mrg" (this is an English Penn Treebank holdover!), or it loads a single file with the given path (including extension)

Popular methods of Treebank

add
apply
Apply a TreeVisitor to each tree in the Treebank. For all current implementations of Treebank, this
clear
Empty a Treebank.
iterator
size
Returns the size of the Treebank.
textualSummary
Return various statistics about the treebank (number of sentences, words, tag set, etc.).
transform
Return a Treebank (actually a TransformingTreebank) where each Tree in the current treebank has been
decimate
Divide a Treebank into 3, by taking every 9th sentence for the dev set and every 10th for the test s
encoding
Returns the encoding in use for treebank file bytestream access.
isEmpty
treeReaderFactory
Get the TreeReaderFactory for a Treebank -- this method is provided in order to make theTreeReaderFa

treeReaderFactory

Popular in Java

Reading from database using SQL prepared statement
runOnUiThread (Activity)
getApplicationContext (Context)
getSharedPreferences (Context)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Join (org.hibernate.mapping)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Best plugins for Eclipse

How to use loadPathmethodin edu.stanford.nlp.trees.Treebank

Best Java code snippets using edu.stanford.nlp.trees.Treebank.loadPath (Showing top 20 results out of 315)

How to use
loadPath
method
in
edu.stanford.nlp.trees.Treebank