@Override public DiskTreebank diskTreebank() { return new DiskTreebank(treeReaderFactory(), inputEncoding); }
DiskTreebank treebank = new DiskTreebank(trf, encoding); treebank.loadPath(path, filter); Iterator<Tree> treeIterator = treebank.iterator(); int treeCount = 0; String currentFile = ""; if (!currentFile.equals(treebank.getCurrentFilename())) { currentFile = treebank.getCurrentFilename(); treeCount = 0;
private boolean primeNextFile() { try { if(curPathIter.hasNext() || (primeNextPath() && curPathIter.hasNext())) { currentFile = curPathIter.next(); currentFilename = currentFile.getAbsolutePath(); if(PRINT_FILENAMES) log.info(currentFile); if (tr != null) { tr.close(); } tr = treeReaderFactory().newTreeReader(IOUtils.readerFromFile(currentFile, encoding())); curLineId = 1; return true; } } catch (UnsupportedEncodingException e) { System.err.printf("%s: Filesystem does not support encoding:%n%s%n", this.getClass().getName(), e.toString()); throw new RuntimeException(e); } catch (FileNotFoundException e) { System.err.printf("%s: File does not exist:%n%s%n", this.getClass().getName(),e.toString()); throw new RuntimeException(e); } catch (IOException e) { System.err.printf("%s: Unable to close open tree reader:%n%s%n", this.getClass().getName(),currentFile.getPath()); throw new RuntimeException(e); } return false; }
private static DiskTreebank makeSecondaryTreebank(String treebankPath, Options op, FileFilter filt) { log.info("Additionally training using secondary disk treebank: " + treebankPath + ' ' + filt); DiskTreebank trainTreebank = op.tlpParams.diskTreebank(); log.info("Reading trees..."); if (filt == null) { trainTreebank.loadPath(treebankPath); } else { trainTreebank.loadPath(treebankPath, filt); } Timing.tick("done [read " + trainTreebank.size() + " trees]."); return trainTreebank; }
/** * * @param args */ public static void main(String[] args) { if(args.length != 1) { System.exit(-1); } ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams(); String[] options = {"-arabicFactored"}; tlpp.setOptionFlag(options, 0); DiskTreebank tb = tlpp.diskTreebank(); tb.loadPath(args[0], "txt", false); for(Tree t : tb) { for(Tree subtree : t) { tlpp.transformTree(subtree, t); } System.out.println(t.toString()); } } }
public static void convertTreebankToCoNLLX(String trainDirPath, FileFilter trainTreeBankFilter, String outputFileName) { DiskTreebank trainTreeBank = new DiskTreebank(); trainTreeBank.loadPath(trainDirPath, trainTreeBankFilter); int counter = 0; int size = trainTreeBank.size(); List<DTree> trees = trainTreeBank.parallelStream().map(tree -> { count(counter, size); return convertTreeBankToCoNLLX(tree.pennString()); }).collect(Collectors.toList()); try { FileWriter fw = new FileWriter(outputFileName); trees.forEach(dTree -> { try { dTree.remove(0); fw.write(dTree.toString()); fw.write(System.lineSeparator()); fw.write(System.lineSeparator()); } catch (IOException e) { e.printStackTrace(); } }); fw.flush(); fw.close(); } catch (IOException e) { e.printStackTrace(); } }
op.setOptions(para.stream().toArray(String[]::new)); DiskTreebank trainTreeBank = new DiskTreebank(); FileFilter trainTreeBankFilter = new NumberRangeFileFilter(startRange, endRange, true); trainTreeBank.loadPath(trainDirPath, trainTreeBankFilter); extraTreeBank = new DiskTreebank(); FileFilter extraTreeBankFilter = new ExtensionFileFilter(train2FileExtension, true); extraTreeBank.loadPath(train2DirPath, extraTreeBankFilter);
private static void loadTreeBank(FileFilter filter, String path, Collection<String> data) { DiskTreebank trainTreeBank = new DiskTreebank(); trainTreeBank.loadPath(path, filter); final TreeNormalizer tn = new BobChrisTreeNormalizer(); trainTreeBank.apply(treeVisitor -> { Tree tPrime = tn.normalizeWholeTree(treeVisitor, treeVisitor.treeFactory()); data.add(SentenceUtils.listToString(tPrime.taggedYield(), false, "_")); }); }
/** * For testing: loads a treebank and prints the trees. */ public static void main(String[] args) { TreebankLangParserParams tlpp = new ChineseTreebankParserParams(); System.out.println("Default encoding is: " + tlpp.diskTreebank().encoding()); if (args.length < 2) { printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange"); } else { Treebank m = tlpp.diskTreebank(); m.loadPath(args[0], new NumberRangesFileFilter(args[1], false)); for (Tree t : m ) { t.pennPrint(tlpp.pw()); } System.out.println("There were " + m.size() + " trees."); } }
DiskTreebank dtb = (DiskTreebank) treebank; pw.print("# "); pw.println(dtb.getCurrentFilename());
tb.loadPath(args[i]);
TreeReaderFactory trf = new PennTreeReaderFactory(new NPTmpRetainingTreeNormalizer()); DiskTreebank testTreebank = new DiskTreebank(trf); testTreebank.loadPath(new File(args[0]), testFilt); HeadFinder hf = new ModCollinsHeadFinder(); Function<String, String> basicCatFunction =
/** * For testing: loads a treebank and prints the trees. */ public static void main(String[] args) { TreebankLangParserParams tlpp = new ChineseTreebankParserParams(); System.out.println("Default encoding is: " + tlpp.diskTreebank().encoding()); if (args.length < 2) { printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange"); } else { Treebank m = tlpp.diskTreebank(); m.loadPath(args[0], new NumberRangesFileFilter(args[1], false)); for (Tree t : m ) { t.pennPrint(tlpp.pw()); } System.out.println("There were " + m.size() + " trees."); } }
DiskTreebank dtb = (DiskTreebank) treebank; pw.print("# "); pw.println(dtb.getCurrentFilename());
/** * Allows you to read in trees from the source you want. It's the * responsibility of treeReaderFactory() to deal properly with character-set * encoding of the input. It also is the responsibility of tr to properly * normalize trees. */ @Override public DiskTreebank diskTreebank() { return new DiskTreebank(treeReaderFactory()); }
private static DiskTreebank makeSecondaryTreebank(String treebankPath, Options op, FileFilter filt) { System.err.println("Additionally training using secondary disk treebank: " + treebankPath + ' ' + filt); DiskTreebank trainTreebank = op.tlpParams.diskTreebank(); System.err.print("Reading trees..."); if (filt == null) { trainTreebank.loadPath(treebankPath); } else { trainTreebank.loadPath(treebankPath, filt); } Timing.tick("done [read " + trainTreebank.size() + " trees]."); return trainTreebank; }
tb.loadPath(args[i++]);
private boolean primeNextFile() { try { if(curPathIter.hasNext() || (primeNextPath() && curPathIter.hasNext())) { currentFile = curPathIter.next(); currentFilename = currentFile.getAbsolutePath(); if(PRINT_FILENAMES) System.err.println(currentFile); if(tr != null) tr.close(); tr = treeReaderFactory().newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(currentFile), encoding()))); curLineId = 1; return true; } } catch (UnsupportedEncodingException e) { System.err.printf("%s: Filesystem does not support encoding:\n%s\n", this.getClass().getName(), e.toString()); throw new RuntimeException(e); } catch (FileNotFoundException e) { System.err.printf("%s: File does not exist:\n%s\n", this.getClass().getName(),e.toString()); throw new RuntimeException(e); } catch (IOException e) { System.err.printf("%s: Unable to close open tree reader:\n%s\n", this.getClass().getName(),currentFile.getPath()); throw new RuntimeException(e); } return false; }
/** * For testing: loads a treebank and prints the trees. */ public static void main(String[] args) { TreebankLangParserParams tlpp = new ChineseTreebankParserParams(); System.out.println("Default encoding is: " + tlpp.diskTreebank().encoding()); if (args.length < 2) { printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange"); } else { Treebank m = tlpp.diskTreebank(); m.loadPath(args[0], new NumberRangesFileFilter(args[1], false)); for (Tree t : m ) { t.pennPrint(tlpp.pw()); } System.out.println("There were " + m.size() + " trees."); } }