/** * Return the whole treebank as a series of big bracketed lists. * Calling this is a really bad idea if your treebank is large. */ @Override public String toString() { final StringBuilder sb = new StringBuilder(); apply(t -> { sb.append(t); sb.append('\n'); }); return sb.toString(); }
/** * Returns the size of the Treebank. * * @return size How many trees are in the treebank */ @Override public int size() { CounterTreeProcessor counter = new CounterTreeProcessor(); apply(counter); return counter.total(); }
flatFile = (makeFlatFile) ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(flatFileName),"UTF-8"))) : null; treebank.apply(new ArabicRawTreeNormalizer(outfile,flatFile));
treebank.apply(tv);
treebank.apply(tv);
@Override public void build() { //Set specific options for this dataset if(options.containsKey(ConfigParser.paramSplit)) { System.err.printf("%s: Ignoring split parameter for this dataset type\n", this.getClass().getName()); } else if(options.containsKey(ConfigParser.paramTagDelim)) { wordTagDelim = options.getProperty(ConfigParser.paramTagDelim); taggedOutput = true; } for(File path : pathsToData) { int prevSize = treebank.size(); treebank.loadPath(path,treeFileExtension,false); toStringBuffer.append(String.format(" Loaded %d trees from %s\n", treebank.size() - prevSize, path.getPath())); prevSize = treebank.size(); } ArabicTreeDecimatedNormalizer tv = new ArabicTreeDecimatedNormalizer(outFileName,makeFlatFile,taggedOutput); treebank.apply(tv); outputFileList.addAll(tv.getFilenames()); tv.closeOutputFiles(); }
public static void main(String[] args) { // simple testing code Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new NoPunctuationHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.international.spanish.SpanishHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new SpanishHeadFinder(); treebank.apply(new TreeVisitor() { public void visitTree(Tree pt) { // pt.percolateHeads(chf); //pt.pennPrint(); Tree head = pt.headTerminal(chf); //System.out.println("======== " + head.label()); } }); }
/** * Go through trees and determine their heads and print them. * Just for debuggin'. <br> * Usage: <code> * java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new CollinsHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
private static void countTaggings(Treebank tb, final PrintWriter pw) { final TwoDimensionalCounter<String,String> wtc = new TwoDimensionalCounter<>(); tb.apply(tree -> { List<TaggedWord> tags = tree.taggedYield(); for (TaggedWord tag : tags) wtc.incrementCount(tag.word(), tag.tag()); }); for (String key : wtc.firstKeySet()) { pw.print(key); pw.print('\t'); Counter<String> ctr = wtc.getCounter(key); for (String k2 : ctr.keySet()) { pw.print(k2 + '\t' + ctr.getCount(k2) + '\t'); } pw.println(); } }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new FrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new AbishekFrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.DybroFrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new DybroFrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Call this method to get a String array of categories to split on. * It calculates parent annotation statistics suitable for doing * selective parent splitting in the PCFGParser inside * FactoredParser. <p> * If tlp is non-null tlp.basicCategory() will be called on parent and * grandparent nodes. <p> * <i>Implementation note:</i> This method is not designed for concurrent * invocation: it uses static state variables. */ public static Set<String> getSplitCategories(Treebank t, boolean doTags, int algorithm, double phrasalCutOff, double tagCutOff, TreebankLanguagePack tlp) { ParentAnnotationStats pas = new ParentAnnotationStats(tlp, doTags); t.apply(pas); Set<String> splitters = Generics.newHashSet(); pas.getSplitters(phrasalCutOff, pas.nodeRules, pas.pRules, pas.gPRules, splitters); pas.getSplitters(tagCutOff, pas.tagNodeRules, pas.tagPRules, pas.tagGPRules, splitters); return splitters; }
treebank.apply(vis); Timing.endTime(); if (TRegexTreeVisitor.printMatches) {
treebank.apply(tree -> { int length = tree.yield().size(); if (length >= minLength && length <= maxLength) { treebank.apply(tree -> { int length = tree.yield().size(); if (length >= minLength && length <= maxLength) { treebank.apply(tree -> { Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory()); int length = tPrime.yield().size(); treebank.apply(tree -> { Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory()); pw.println(SentenceUtils.listToString(tPrime.taggedYield(), false, "_")); treebank.apply(tree -> { int length = tree.yield().size(); if (length >= minLength && length <= maxLength) {
treebank.getTreebank().apply(vis); updateProgressBar(multiplier*treebankNum++);
} else { ParentAnnotationStats pas = new ParentAnnotationStats(null, doTags); treebank.apply(pas); pas.printStats();
private static void runTiming(Treebank treebank) { System.out.println(); Timing.startTime(); int num = 0; for (Tree t : treebank) { num += t.yield().size(); } Timing.endTime("traversing corpus, counting words with iterator"); log.info("There were " + num + " words in the treebank."); treebank.apply(new TreeVisitor() { int num; // = 0; @Override public void visitTree(final Tree t) { num += t.yield().size(); } }); log.info(); Timing.endTime("traversing corpus, counting words with TreeVisitor"); log.info("There were " + num + " words in the treebank."); log.info(); Timing.startTime(); log.info("This treebank contains " + treebank.size() + " trees."); Timing.endTime("size of corpus"); }
/** * Calculate sister annotation statistics suitable for doing * selective sister splitting in the PCFGParser inside the * FactoredParser. * * @param args One argument: path to the Treebank */ public static void main(String[] args) { ClassicCounter<String> c = new ClassicCounter<>(); c.setCount("A", 0); c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d); String encoding = "UTF-8"; if (args.length > 1) { encoding = args[1]; } if (args.length < 1) { System.out.println("Usage: ParentAnnotationStats treebankPath"); } else { SisterAnnotationStats pas = new SisterAnnotationStats(); Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer()), encoding); treebank.loadPath(args[0]); treebank.apply(pas); pas.printStats(); } }