private static void resolveDummyTags(File treeFile, TwoDimensionalCounter<String, String> pretermLabel, TwoDimensionalCounter<String, String> unigramTagger) { try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")); TreeReaderFactory trf = new FrenchTreeReaderFactory(); TreeReader tr = trf.newTreeReader(br); PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")),false,"UTF-8")); int nTrees = 0; for(Tree t; (t = tr.readTree()) != null;nTrees++) { traverseAndFix(t, pretermLabel, unigramTagger); pw.println(t.toString()); } pw.close(); tr.close(); System.out.println("Processed " +nTrees+ " trees"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
/** * Reads a single tree. * * @return A single tree, or <code>null</code> at end of file. */ public Tree readTree() throws IOException { Tree t; do { t = tr.readTree(); } while (t != null && ! f.test(t)); return t; }
/** * Close the Reader behind this <code>TreeReader</code>. */ public void close() throws IOException { tr.close(); }
@Override public Tree getNext() { try { return tr.readTree(); } catch(IOException e) { log.info("Error in reading tree."); return null; } } };
@Override public List<String> next() { if (nextYield == null) { try { if (fileReader != null) { fileReader.close(); fileReader = null; } else if (treeReader != null) { treeReader.close(); treeReader = null; } } catch (IOException e) { e.printStackTrace(); } return null; } else { List<String> next = nextYield; primeNext(); return next; } }
/** * @param args */ public static void main(String[] args) { if(args.length != 1) { System.err.printf("Usage: java %s tree_file > trees%n", HebrewTreeReaderFactory.class.getName()); System.exit(-1); } TreebankLanguagePack tlp = new HebrewTreebankLanguagePack(); File treeFile = new File(args[0]); try { TreeReaderFactory trf = new HebrewTreeReaderFactory(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding())); TreeReader tr = trf.newTreeReader(br); int numTrees = 0; for(Tree t; ((t = tr.readTree()) != null); numTrees++) System.out.println(t.toString()); tr.close(); System.err.printf("Processed %d trees.%n",numTrees); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
/** * This gives you a tree from a String representation (as a * bracketed Tree, of the kind produced by {@code toString()}, * {@code pennPrint()}, or as in the Penn Treebank. * It's not the most efficient thing to do for heavy duty usage. * * @param str The tree as a bracketed list in a String. * @param trf The TreeFactory used to make the new Tree * @return The Tree * @throws RuntimeException If the Tree format is not valid */ public static Tree valueOf(String str, TreeReaderFactory trf) { try { return trf.newTreeReader(new StringReader(str)).readTree(); } catch (IOException ioe) { throw new RuntimeException("Tree.valueOf() tree construction failed", ioe); } }
private boolean primeNextFile() { try { if(curPathIter.hasNext() || (primeNextPath() && curPathIter.hasNext())) { currentFile = curPathIter.next(); currentFilename = currentFile.getAbsolutePath(); if(PRINT_FILENAMES) log.info(currentFile); if (tr != null) { tr.close(); } tr = treeReaderFactory().newTreeReader(IOUtils.readerFromFile(currentFile, encoding())); curLineId = 1; return true; } } catch (UnsupportedEncodingException e) { System.err.printf("%s: Filesystem does not support encoding:%n%s%n", this.getClass().getName(), e.toString()); throw new RuntimeException(e); } catch (FileNotFoundException e) { System.err.printf("%s: File does not exist:%n%s%n", this.getClass().getName(),e.toString()); throw new RuntimeException(e); } catch (IOException e) { System.err.printf("%s: Unable to close open tree reader:%n%s%n", this.getClass().getName(),currentFile.getPath()); throw new RuntimeException(e); } return false; }
/** * Loads treebank data from first argument and prints it. * * @param args Array of command-line arguments: specifies a filename */ public static void main(String[] args) { try { TreeFactory tf = new LabeledScoredTreeFactory(); Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8")); TreeReader tr = new PennTreeReader(r, tf); Tree t = tr.readTree(); while (t != null) { System.out.println(t); System.out.println(); t = tr.readTree(); } r.close(); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } }
TreeReader tr = trf.newTreeReader(file.getPath(), in); process(file, tr, posPattern, wordPattern, plainPrint); tr.close(); } catch (IOException e) { e.printStackTrace();
private void primeNext() { try { if (treeReader != null) { Tree tree = treeReader.readTree(); if (tree == null) { nextYield = null; } else { List<CoreLabel> mLabeledLeaves = tree.taggedLabeledYield(); nextYield = new ArrayList<>(mLabeledLeaves.size()); for (CoreLabel label : mLabeledLeaves) { nextYield.add(label.tag()); } } } else { String line = fileReader.readLine(); if (line == null) { nextYield = null; } else { nextYield = Arrays.asList(line.split("\\s+")); } } } catch (IOException e) { nextYield = null; e.printStackTrace(); } }
/** * Close the Reader behind this <code>TreeReader</code>. */ public void close() throws IOException { tr.close(); }
private Tree primeNextTree() { Tree t = null; try { t = tr.readTree(); if(t == null && primeNextFile()) //Current file is exhausted t = tr.readTree(); //Associate this tree with a file and line number if(t != null && t.label() != null && t.label() instanceof HasIndex) { HasIndex lab = (HasIndex) t.label(); lab.setSentIndex(curLineId++); lab.setDocID(currentFile.getName()); } } catch (IOException e) { System.err.printf("%s: Error reading from file %s:%n%s%n", this.getClass().getName(), currentFile.getPath(), e.toString()); throw new RuntimeException(e); } return t; }
/** * Close the Reader behind this <code>TreeReader</code>. */ public void close() throws IOException { tr.close(); }
/** * Read trees from the given file and output their processed forms to * standard output. */ public static void process(File file, TreeReader tr, Pattern posPattern, Pattern wordPattern, boolean plainPrint) throws IOException { Tree t; int numTrees = 0, numTreesRetained = 0; String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.')); while ((t = tr.readTree()) != null) { numTrees++; if (!shouldPrintTree(t, posPattern, wordPattern)) continue; numTreesRetained++; String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class); String output = toString(t, plainPrint); System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, output); } System.err.printf("%s: %d trees, %d matched and printed%n", file.getName(), numTrees, numTreesRetained); }
/** * Close the Reader behind this <code>TreeReader</code>. */ public void close() throws IOException { tr.close(); }