edu.stanford.nlp.trees.Treebank.apply java code examples

/**
 * Return the whole treebank as a series of big bracketed lists.
 * Calling this is a really bad idea if your treebank is large.
 */
@Override
public String toString() {
 final StringBuilder sb = new StringBuilder();
 apply(t -> {
  sb.append(t);
  sb.append('\n');
 });
 return sb.toString();
}

/**
 * Returns the size of the Treebank.
 *
 * @return size How many trees are in the treebank
 */
@Override
public int size() {
 CounterTreeProcessor counter = new CounterTreeProcessor();
 apply(counter);
 return counter.total();
}

flatFile = (makeFlatFile) ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(flatFileName),"UTF-8"))) : null;
treebank.apply(new ArabicRawTreeNormalizer(outfile,flatFile));

treebank.apply(tv);

treebank.apply(tv);

@Override
public void build() {
 //Set specific options for this dataset
 if(options.containsKey(ConfigParser.paramSplit)) {
  System.err.printf("%s: Ignoring split parameter for this dataset type\n", this.getClass().getName());
 } else if(options.containsKey(ConfigParser.paramTagDelim)) {
  wordTagDelim = options.getProperty(ConfigParser.paramTagDelim);
  taggedOutput = true;
 }
 for(File path : pathsToData) {
  int prevSize = treebank.size();
  treebank.loadPath(path,treeFileExtension,false);
  toStringBuffer.append(String.format(" Loaded %d trees from %s\n", treebank.size() - prevSize, path.getPath()));
  prevSize = treebank.size();
 }
 ArabicTreeDecimatedNormalizer tv = new ArabicTreeDecimatedNormalizer(outFileName,makeFlatFile,taggedOutput);
 treebank.apply(tv);
 outputFileList.addAll(tv.getFilenames());
 tv.closeOutputFiles();
}

public static void main(String[] args) {
 // simple testing code
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new NoPunctuationHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debugging. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.international.spanish.SpanishHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new SpanishHeadFinder();
 treebank.apply(new TreeVisitor() {
  public void visitTree(Tree pt) {
   // pt.percolateHeads(chf);
   //pt.pennPrint();
   Tree head = pt.headTerminal(chf);
   //System.out.println("======== " + head.label());
  }
 });
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debuggin'. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new CollinsHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

private static void countTaggings(Treebank tb, final PrintWriter pw) {
 final TwoDimensionalCounter<String,String> wtc = new TwoDimensionalCounter<>();
 tb.apply(tree -> {
  List<TaggedWord> tags = tree.taggedYield();
  for (TaggedWord tag : tags)
   wtc.incrementCount(tag.word(), tag.tag());
 });
 for (String key : wtc.firstKeySet()) {
  pw.print(key);
  pw.print('\t');
  Counter<String> ctr = wtc.getCounter(key);
  for (String k2 : ctr.keySet()) {
   pw.print(k2 + '\t' + ctr.getCount(k2) + '\t');
  }
  pw.println();
 }
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debugging. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new FrenchHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debugging. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new AbishekFrenchHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

/**
 * Go through trees and determine their heads and print them.
 * Just for debugging. <br>
 * Usage: <code>
 * java edu.stanford.nlp.trees.DybroFrenchHeadFinder treebankFilePath
 * </code>
 *
 * @param args The treebankFilePath
 */
public static void main(String[] args) {
 Treebank treebank = new DiskTreebank();
 CategoryWordTag.suppressTerminalDetails = true;
 treebank.loadPath(args[0]);
 final HeadFinder chf = new DybroFrenchHeadFinder();
 treebank.apply(pt -> {
  pt.percolateHeads(chf);
  pt.pennPrint();
  System.out.println();
 });
}

/**
 * Call this method to get a String array of categories to split on.
 * It calculates parent annotation statistics suitable for doing
 * selective parent splitting in the PCFGParser inside
 * FactoredParser.  <p>
 * If tlp is non-null tlp.basicCategory() will be called on parent and
 * grandparent nodes. <p>
 * <i>Implementation note:</i> This method is not designed for concurrent
 * invocation: it uses static state variables.
 */
public static Set<String> getSplitCategories(Treebank t, boolean doTags, int algorithm, double phrasalCutOff, double tagCutOff, TreebankLanguagePack tlp) {
 ParentAnnotationStats pas = new ParentAnnotationStats(tlp, doTags);
 t.apply(pas);
 Set<String> splitters = Generics.newHashSet();
 pas.getSplitters(phrasalCutOff, pas.nodeRules, pas.pRules, pas.gPRules, splitters);
 pas.getSplitters(tagCutOff, pas.tagNodeRules, pas.tagPRules, pas.tagGPRules, splitters);
 return splitters;
}

treebank.apply(vis);
Timing.endTime();
if (TRegexTreeVisitor.printMatches) {

treebank.apply(tree -> {
 int length = tree.yield().size();
 if (length >= minLength && length <= maxLength) {
treebank.apply(tree -> {
 int length = tree.yield().size();
 if (length >= minLength && length <= maxLength) {
treebank.apply(tree -> {
 Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory());
 int length = tPrime.yield().size();
treebank.apply(tree -> {
 Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory());
 pw.println(SentenceUtils.listToString(tPrime.taggedYield(), false, "_"));
treebank.apply(tree -> {
 int length = tree.yield().size();
 if (length >= minLength && length <= maxLength) {

treebank.getTreebank().apply(vis);
updateProgressBar(multiplier*treebankNum++);

} else {
 ParentAnnotationStats pas = new ParentAnnotationStats(null, doTags);
 treebank.apply(pas);
 pas.printStats();

private static void runTiming(Treebank treebank) {
 System.out.println();
 Timing.startTime();
 int num = 0;
 for (Tree t : treebank) {
  num += t.yield().size();
 }
 Timing.endTime("traversing corpus, counting words with iterator");
 log.info("There were " + num + " words in the treebank.");
 treebank.apply(new TreeVisitor() {
   int num; // = 0;
   @Override
   public void visitTree(final Tree t) {
    num += t.yield().size();
   }
  });
 log.info();
 Timing.endTime("traversing corpus, counting words with TreeVisitor");
 log.info("There were " + num + " words in the treebank.");
 log.info();
 Timing.startTime();
 log.info("This treebank contains " + treebank.size() + " trees.");
 Timing.endTime("size of corpus");
}

/**
 * Calculate sister annotation statistics suitable for doing
 * selective sister splitting in the PCFGParser inside the
 * FactoredParser.
 *
 * @param args One argument: path to the Treebank
 */
public static void main(String[] args) {
 ClassicCounter<String> c = new ClassicCounter<>();
 c.setCount("A", 0);
 c.setCount("B", 1);
 double d = Counters.klDivergence(c, c);
 System.out.println("KL Divergence: " + d);
 String encoding = "UTF-8";
 if (args.length > 1) {
  encoding = args[1];
 }
 if (args.length < 1) {
  System.out.println("Usage: ParentAnnotationStats treebankPath");
 } else {
  SisterAnnotationStats pas = new SisterAnnotationStats();
  Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer()), encoding);
  treebank.loadPath(args[0]);
  treebank.apply(pas);
  pas.printStats();
 }
}

Javadoc

Apply a TreeVisitor to each tree in the Treebank. For all current implementations of Treebank, this is the fastest way to traverse all the trees in the Treebank.

Popular methods of Treebank

add
clear
Empty a Treebank.
iterator
loadPath
Load trees from given directory.
size
Returns the size of the Treebank.
textualSummary
Return various statistics about the treebank (number of sentences, words, tag set, etc.).
transform
Return a Treebank (actually a TransformingTreebank) where each Tree in the current treebank has been
decimate
Divide a Treebank into 3, by taking every 9th sentence for the dev set and every 10th for the test s
encoding
Returns the encoding in use for treebank file bytestream access.
isEmpty
treeReaderFactory
Get the TreeReaderFactory for a Treebank -- this method is provided in order to make theTreeReaderFa

treeReaderFactory

Popular in Java

Making http post requests using okhttp
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getResourceAsStream (ClassLoader)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
CodeWhisperer alternatives

How to use applymethodin edu.stanford.nlp.trees.Treebank

Best Java code snippets using edu.stanford.nlp.trees.Treebank.apply (Showing top 20 results out of 315)

How to use
apply
method
in
edu.stanford.nlp.trees.Treebank