MemoryTreebank tb = new MemoryTreebank(opts.treeNormalizer); Iterable<Tree> trees = tb; System.err.println("Example:"); TreeReader tr = new PennTreeReader(new StringReader("((S (NP (NNP Sam)) (VP (VBD died) (NP-TMP (NN today)))))")); tb.add(tr.readTree()); } catch (Exception e) { log.info("Horrible error: " + e); tb.loadPath(treeFileName); } else if (filter != null) { tb.load(IOUtils.readerFromStdin()); } else if (conllXFileName != null) { try {
tr = treeReaderFactory().newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding()))); int sentIndex=0; Tree pt;
private static void extractSubtrees(List<String> codeStrings, String treeFile) { List<Pair<Integer,Integer>> codes = new ArrayList<>(); for(String s : codeStrings) { Matcher m = codePattern.matcher(s); if(m.matches()) codes.add(new Pair<>(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)))); else throw new RuntimeException("Error: illegal node code " + s); } TreeReaderFactory trf = new TRegexTreeReaderFactory(); MemoryTreebank treebank = new MemoryTreebank(trf); treebank.loadPath(treeFile,null, true); for (Pair<Integer,Integer> code : codes) { Tree t = treebank.get(code.first()-1); t.getNodeNumber(code.second()).pennPrint(); } }
/** * Return a MemoryTreebank where each * Tree in the current treebank has been transformed using the * TreeTransformer. This Treebank is unchanged (assuming that the * TreeTransformer correctly doesn't change input Trees). * * @param treeTrans The TreeTransformer to use */ @Override public Treebank transform(TreeTransformer treeTrans) { Treebank mtb = new MemoryTreebank(size(), treeReaderFactory()); for (Tree t : this) { mtb.add(treeTrans.transformTree(t)); } return mtb; }
/** * Given a file name, reads in those trees and returns them as a List */ public static List<Tree> readTreesWithLabels(String path, Class<? extends CoreAnnotation<Integer>> annotationClass) { List<Tree> trees = Generics.newArrayList(); MemoryTreebank treebank = new MemoryTreebank("utf-8"); treebank.loadPath(path, null); for (Tree tree : treebank) { attachLabels(tree, annotationClass); trees.add(tree); } return trees; }
/** Fix all the English Penn Treebank errors, or at least some of them (!). */ @Override public MemoryTreebank transformTrees(Treebank tb) { MemoryTreebank mtb = new MemoryTreebank(tb.treeReaderFactory(), tb.encoding()); for (Tree t : tb) { mtb.add(Tsurgeon.processPatternsOnTree(ops, t)); } return mtb; }
testTreebank.loadPath(path, new NumberRangeFileFilter(testLow, testHigh, true)); if (op.testOptions.increasingLength) { Collections.sort(testTreebank, new TreeLengthComparator()); for (int tNum = 0, ttSize = testTreebank.size(); tNum < ttSize; tNum++) { Tree tree = testTreebank.get(tNum); int testTreeLen = tree.yield().size(); if (testTreeLen > op.testOptions.maxLength) {
/** * Allows you to read in trees from the source you want. It's the * responsibility of treeReaderFactory() to deal properly with character-set * encoding of the input. It also is the responsibility of tr to properly * normalize trees. */ @Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory()); }
MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank(); FileFilter trainFilt = new NumberRangesFileFilter(statArgs[1], false); rawTrainTreebank.loadPath(new File(statArgs[0]), trainFilt); log.info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.containsKey("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp, op); for (Tree tree : rawTrainTreebank) { trainTreebank.add(annotator.transformTree(tree)); MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank(); FileFilter trainFilt = new NumberRangesFileFilter(lexArgs[1], false); rawTrainTreebank.loadPath(new File(lexArgs[0]), trainFilt); log.info("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.containsKey("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp, op); for (Tree tree : rawTrainTreebank) { tree = annotator.transformTree(tree); trainTreebank.add(tree); lex.initializeTraining(trainTreebank.size()); lex.train(trainTreebank); lex.finishTraining(); MemoryTreebank testTreebank = op.tlpParams.memoryTreebank(); FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false);
MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false, true)); tb.loadPath(treeFileName); Iterator<Tree> treeIterator = tb.iterator(); sgIterator = new TreeToSemanticGraphIterator(treeIterator); } else if (conlluFileName != null) {
if (argsMap.containsKey("-filter")) { TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName); treebank = new MemoryTreebank(trf, encoding);//has to be in memory since we're not storing it on disk ((MemoryTreebank) treebank).load(reader); reader.close(); } else if (args.length == 1) { TreeReader r = new PennTreeReader(new StringReader("(VP (VP (VBZ Try) (NP (NP (DT this) (NN wine)) (CC and) (NP (DT these) (NNS snails)))) (PUNCT .))"), new LabeledScoredTreeFactory(new StringLabelFactory())); Tree t = r.readTree(); treebank = new MemoryTreebank(); treebank.add(t); } else {
MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank(); FileFilter trainFilt = new NumberRangesFileFilter(statArgs[1], false); rawTrainTreebank.loadPath(new File(statArgs[0]), trainFilt); System.err.println("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.containsKey("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp); for (Tree tree : rawTrainTreebank) { trainTreebank.add(annotator.transformTree(tree)); MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank(); FileFilter trainFilt = new NumberRangesFileFilter(lexArgs[1], false); rawTrainTreebank.loadPath(new File(lexArgs[0]), trainFilt); System.err.println("Done reading trees."); MemoryTreebank trainTreebank; if (argMap.containsKey("-annotate")) { trainTreebank = new MemoryTreebank(); TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp); for (Iterator iter = rawTrainTreebank.iterator(); iter.hasNext();) { Tree tree = (Tree) iter.next(); tree = annotator.transformTree(tree); trainTreebank.add(tree); MemoryTreebank testTreebank = op.tlpParams.memoryTreebank(); FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false); testTreebank.loadPath(new File(testArgs[0]), testFilt); TreeTransformer subcategoryStripper = op.tlpParams.subcategoryStripper(); TreeTransformer collinizer = ctpp.collinizer();
public void loadPath(String path, FileFilter filt, String srlFile) { readSRLFile(srlFile); FilePathProcessor.processPath(new File(path), filt, this); srlMap = null; }
/** * Load a collection of parse trees from a Reader. * Each tree may optionally be encased in parens to allow for Penn * Treebank style trees. * * @param r The reader to read trees from. (If you want it buffered, * you should already have buffered it!) */ public void load(Reader r) { load(r, null); }
/** * Load a collection of parse trees from a Reader. * Each tree may optionally be encased in parens to allow for Penn * Treebank style trees. * * @param r The reader to read trees from. (If you want it buffered, * you should already have buffered it!) * @param id An ID for where these files come from (arbitrary, but * something like a filename. Can be {@code null} for none. */ public void load(Reader r, String id) { try { // could throw an IO exception? TreeReader tr = treeReaderFactory().newTreeReader(r); int sentIndex = 0; for (Tree pt; (pt = tr.readTree()) != null; ) { if (pt.label() instanceof HasIndex) { // so we can trace where this tree came from HasIndex hi = (HasIndex) pt.label(); if (id != null) { hi.setDocID(id); } hi.setSentIndex(sentIndex); } parseTrees.add(pt); sentIndex++; } } catch (IOException e) { log.info("load IO Exception: " + e); } }
testTreebank.loadPath(new File(args[0]), testFilt); PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
/** * Return a MemoryTreebank where each * Tree in the current treebank has been transformed using the * TreeTransformer. This Treebank is unchanged (assuming that the * TreeTransformer correctly doesn't change input Trees). * * @param treeTrans The TreeTransformer to use */ @Override public Treebank transform(TreeTransformer treeTrans) { Treebank mtb = new MemoryTreebank(size(), treeReaderFactory()); for (Tree t : this) { mtb.add(treeTrans.transformTree(t)); } return mtb; }
testTreebank.loadPath(path, new NumberRangeFileFilter(testLow, testHigh, true)); if (op.testOptions.increasingLength) { Collections.sort(testTreebank, new TreeLengthComparator()); for (int tNum = 0, ttSize = testTreebank.size(); tNum < ttSize; tNum++) { Tree tree = testTreebank.get(tNum); int testTreeLen = tree.yield().size(); if (testTreeLen > op.testOptions.maxLength) {
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
for (String treeFile : argsMap.get(TREE_FILE)) { log.info("Loading file " + treeFile); MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer()); treebank.loadPath(treeFile); for (Tree tree : treebank) {