edu.stanford.nlp.trees.DiskTreebank java code examples

@Override
public DiskTreebank diskTreebank() {
 return new DiskTreebank(treeReaderFactory(), inputEncoding);
}

DiskTreebank treebank = new DiskTreebank(trf, encoding);
treebank.loadPath(path, filter);
Iterator<Tree> treeIterator = treebank.iterator();
int treeCount = 0;
String currentFile = "";
 if (!currentFile.equals(treebank.getCurrentFilename())) {
  currentFile = treebank.getCurrentFilename();
  treeCount = 0;

private boolean primeNextFile() {
 try {
  if(curPathIter.hasNext() || (primeNextPath() && curPathIter.hasNext())) {
   currentFile = curPathIter.next();
   currentFilename = currentFile.getAbsolutePath();
   if(PRINT_FILENAMES) log.info(currentFile);
   if (tr != null) { tr.close(); }
   tr = treeReaderFactory().newTreeReader(IOUtils.readerFromFile(currentFile, encoding()));
   curLineId = 1;
   return true;
  }
 } catch (UnsupportedEncodingException e) {
  System.err.printf("%s: Filesystem does not support encoding:%n%s%n", this.getClass().getName(), e.toString());
  throw new RuntimeException(e);
 } catch (FileNotFoundException e) {
  System.err.printf("%s: File does not exist:%n%s%n", this.getClass().getName(),e.toString());
  throw new RuntimeException(e);
 } catch (IOException e) {
  System.err.printf("%s: Unable to close open tree reader:%n%s%n", this.getClass().getName(),currentFile.getPath());
  throw new RuntimeException(e);
 }
 return false;
}

private static DiskTreebank makeSecondaryTreebank(String treebankPath, Options op, FileFilter filt) {
 log.info("Additionally training using secondary disk treebank: " + treebankPath + ' ' + filt);
 DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
 log.info("Reading trees...");
 if (filt == null) {
  trainTreebank.loadPath(treebankPath);
 } else {
  trainTreebank.loadPath(treebankPath, filt);
 }
 Timing.tick("done [read " + trainTreebank.size() + " trees].");
 return trainTreebank;
}

 /**
  *
  * @param args
  */
 public static void main(String[] args) {
  if(args.length != 1) {
   System.exit(-1);
  }

  ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams();
  String[] options = {"-arabicFactored"};
  tlpp.setOptionFlag(options, 0);
  DiskTreebank tb = tlpp.diskTreebank();
  tb.loadPath(args[0], "txt", false);

  for(Tree t : tb) {
   for(Tree subtree : t) {
    tlpp.transformTree(subtree, t);
   }
   System.out.println(t.toString());
  }
 }
}

public static void convertTreebankToCoNLLX(String trainDirPath, FileFilter trainTreeBankFilter, String outputFileName) {
  DiskTreebank trainTreeBank = new DiskTreebank();
  trainTreeBank.loadPath(trainDirPath, trainTreeBankFilter);
  int counter = 0;
  int size = trainTreeBank.size();
  List<DTree> trees = trainTreeBank.parallelStream().map(tree -> {
    count(counter, size);
    return convertTreeBankToCoNLLX(tree.pennString());
  }).collect(Collectors.toList());
  try {
    FileWriter fw = new FileWriter(outputFileName);
    trees.forEach(dTree -> {
      try {
        dTree.remove(0);
        fw.write(dTree.toString());
        fw.write(System.lineSeparator());
        fw.write(System.lineSeparator());
      } catch (IOException e) {
        e.printStackTrace();
      }
    });
    fw.flush();
    fw.close();
  } catch (IOException e) {
    e.printStackTrace();
  }
}

op.setOptions(para.stream().toArray(String[]::new));
DiskTreebank trainTreeBank = new DiskTreebank();
FileFilter trainTreeBankFilter = new NumberRangeFileFilter(startRange, endRange, true);
trainTreeBank.loadPath(trainDirPath, trainTreeBankFilter);
  extraTreeBank = new DiskTreebank();
  FileFilter extraTreeBankFilter = new ExtensionFileFilter(train2FileExtension, true);
  extraTreeBank.loadPath(train2DirPath, extraTreeBankFilter);

private static void loadTreeBank(FileFilter filter, String path, Collection<String> data) {
  DiskTreebank trainTreeBank = new DiskTreebank();
  trainTreeBank.loadPath(path, filter);
  final TreeNormalizer tn = new BobChrisTreeNormalizer();
  trainTreeBank.apply(treeVisitor -> {
    Tree tPrime = tn.normalizeWholeTree(treeVisitor, treeVisitor.treeFactory());
    data.add(SentenceUtils.listToString(tPrime.taggedYield(), false, "_"));
  });
}

/**
 * For testing: loads a treebank and prints the trees.
 */
public static void main(String[] args) {
 TreebankLangParserParams tlpp = new ChineseTreebankParserParams();
 System.out.println("Default encoding is: " +
           tlpp.diskTreebank().encoding());
 if (args.length < 2) {
  printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange");
 } else {
  Treebank m = tlpp.diskTreebank();
  m.loadPath(args[0], new NumberRangesFileFilter(args[1], false));
  for (Tree t : m ) {
   t.pennPrint(tlpp.pw());
  }
  System.out.println("There were " + m.size() + " trees.");
 }
}

DiskTreebank dtb = (DiskTreebank) treebank;
pw.print("# ");
pw.println(dtb.getCurrentFilename());

  tb.loadPath(args[i]);
Lexicon lex = tlpp.lex(op, wordIndex, tagIndex);
int computeAfter = (int) (0.50 * tb.size());
Counter<String> vocab = new ClassicCounter<>();
Counter<String> unkCounter = new ClassicCounter<>();

tb.loadPath(args[i]);

TreeReaderFactory trf = 
 new PennTreeReaderFactory(new NPTmpRetainingTreeNormalizer());
DiskTreebank testTreebank = new DiskTreebank(trf);
testTreebank.loadPath(new File(args[0]), testFilt);
HeadFinder hf = new ModCollinsHeadFinder();
Function<String, String> basicCatFunction =

/**
 * For testing: loads a treebank and prints the trees.
 */
public static void main(String[] args) {
 TreebankLangParserParams tlpp = new ChineseTreebankParserParams();
 System.out.println("Default encoding is: " +
           tlpp.diskTreebank().encoding());
 if (args.length < 2) {
  printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange");
 } else {
  Treebank m = tlpp.diskTreebank();
  m.loadPath(args[0], new NumberRangesFileFilter(args[1], false));
  for (Tree t : m ) {
   t.pennPrint(tlpp.pw());
  }
  System.out.println("There were " + m.size() + " trees.");
 }
}

DiskTreebank dtb = (DiskTreebank) treebank;
pw.print("# ");
pw.println(dtb.getCurrentFilename());

/**
 * Allows you to read in trees from the source you want.  It's the
 * responsibility of treeReaderFactory() to deal properly with character-set
 * encoding of the input.  It also is the responsibility of tr to properly
 * normalize trees.
 */
@Override
public DiskTreebank diskTreebank() {
 return new DiskTreebank(treeReaderFactory());
}

private static DiskTreebank makeSecondaryTreebank(String treebankPath, Options op, FileFilter filt) {
 System.err.println("Additionally training using secondary disk treebank: " + treebankPath + ' ' + filt);
 DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
 System.err.print("Reading trees...");
 if (filt == null) {
  trainTreebank.loadPath(treebankPath);
 } else {
  trainTreebank.loadPath(treebankPath, filt);
 }
 Timing.tick("done [read " + trainTreebank.size() + " trees].");
 return trainTreebank;
}

tb.loadPath(args[i++]);

private boolean primeNextFile() {
  try {
    if(curPathIter.hasNext() || (primeNextPath() && curPathIter.hasNext())) {
      currentFile = curPathIter.next();
      currentFilename = currentFile.getAbsolutePath();
      if(PRINT_FILENAMES) System.err.println(currentFile);
      if(tr != null) tr.close();
      tr = treeReaderFactory().newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(currentFile), encoding())));
      curLineId = 1;
      return true;
    }
  } catch (UnsupportedEncodingException e) {
    System.err.printf("%s: Filesystem does not support encoding:\n%s\n", this.getClass().getName(), e.toString());
            throw new RuntimeException(e);
  } catch (FileNotFoundException e) {
    System.err.printf("%s: File does not exist:\n%s\n", this.getClass().getName(),e.toString());
            throw new RuntimeException(e);
  } catch (IOException e) {
    System.err.printf("%s: Unable to close open tree reader:\n%s\n", this.getClass().getName(),currentFile.getPath());
            throw new RuntimeException(e);
  }
        return false;
}

/**
 * For testing: loads a treebank and prints the trees.
 */
public static void main(String[] args) {
 TreebankLangParserParams tlpp = new ChineseTreebankParserParams();
 System.out.println("Default encoding is: " +
           tlpp.diskTreebank().encoding());
 if (args.length < 2) {
  printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange");
 } else {
  Treebank m = tlpp.diskTreebank();
  m.loadPath(args[0], new NumberRangesFileFilter(args[1], false));
  for (Tree t : m ) {
   t.pennPrint(tlpp.pw());
  }
  System.out.println("There were " + m.size() + " trees.");
 }
}

Javadoc

A DiskTreebank is a Collection of Trees. A DiskTreebank object stores merely the information to get at a corpus of trees that is stored on disk. Access is usually via apply()'ing a TreeVisitor to each Tree in the Treebank or by using an iterator() to get an iteration over the Trees.

If the root Label of the Tree objects built by the TreeReader implements HasIndex, then the filename and index of the tree in a corpus will be inserted as they are read in.

Most used methods

<init>
Create a new treebank, set the encoding for file access.
loadPath
encoding
getCurrentFilename
Returns the absolute path of the file currently being read.
size
treeReaderFactory
apply
Applies the TreeVisitor to to all trees in the Treebank.
iterator
Return an Iterator over Trees in the Treebank. This is implemented by building per-file MemoryTreeba
parallelStream
textualSummary
transform

transform

Popular in Java

Making http requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSystemService (Context)
onRequestPermissionsResult (Fragment)
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Best IntelliJ plugins

How to useDiskTreebank in edu.stanford.nlp.trees

Best Java code snippets using edu.stanford.nlp.trees.DiskTreebank (Showing top 20 results out of 315)

How to use
DiskTreebank
in
edu.stanford.nlp.trees