/** * Stores the passed-in TreebankLanguagePack and sets up charset encodings. * * @param tlp The treebank language pack to use */ protected AbstractTreebankParserParams(TreebankLanguagePack tlp) { this.tlp = tlp; inputEncoding = tlp.getEncoding(); outputEncoding = tlp.getEncoding(); generateOriginalDependencies = false; }
try { TreeReaderFactory trf = new HebrewTreeReaderFactory(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding())); TreeReader tr = trf.newTreeReader(br); PrintWriter pwDev = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.dev"),false,tlp.getEncoding())); PrintWriter pwTrain = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.train"),false,tlp.getEncoding())); PrintWriter pwTest = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.test"),false,tlp.getEncoding()));
/** * Read parse trees from a Reader. * * @param in Reader * @param tf TreeFactory -- factory to create some kind of Tree * @param tn the method of normalizing trees */ public FrenchXMLTreeReader(Reader in, TreeFactory tf, TreeNormalizer tn) { TreebankLanguagePack tlp = new FrenchTreebankLanguagePack(); stream = new ReaderInputStream(in,tlp.getEncoding()); treeFactory = tf; treeNormalizer = tn; DocumentBuilder parser = XMLUtils.getXmlParser(); try { final Document xml = parser.parse(stream); final Element root = xml.getDocumentElement(); sentences = root.getElementsByTagName(NODE_SENT); sentIdx = 0; } catch (SAXException | IOException e) { e.printStackTrace(); } }
/** * @param args */ public static void main(String[] args) { if(args.length != 1) { System.err.printf("Usage: java %s tree_file > trees%n", HebrewTreeReaderFactory.class.getName()); System.exit(-1); } TreebankLanguagePack tlp = new HebrewTreebankLanguagePack(); File treeFile = new File(args[0]); try { TreeReaderFactory trf = new HebrewTreeReaderFactory(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding())); TreeReader tr = trf.newTreeReader(br); int numTrees = 0; for(Tree t; ((t = tr.readTree()) != null); numTrees++) System.out.println(t.toString()); tr.close(); System.err.printf("Processed %d trees.%n",numTrees); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
public SpanishTreebankParserParams() { super(new SpanishTreebankLanguagePack()); setInputEncoding(treebankLanguagePack().getEncoding()); setHeadFinder(new SpanishHeadFinder()); optionsString = new StringBuilder(); optionsString.append(getClass().getSimpleName() + "\n"); buildAnnotations(); }
@Override public void run() { try { if (zipFilename != null) { parser = LexicalizedParser.loadModelFromZip(zipFilename, filename); } else { parser = LexicalizedParser.loadModel(filename); } } catch (Exception ex) { JOptionPane.showMessageDialog(ParserPanel.this, "Error loading parser: " + filename, null, JOptionPane.ERROR_MESSAGE); setStatus("Error loading parser"); parser = null; } catch (OutOfMemoryError e) { JOptionPane.showMessageDialog(ParserPanel.this, "Could not load parser. Out of memory.", null, JOptionPane.ERROR_MESSAGE); setStatus("Error loading parser"); parser = null; } stopProgressMonitor(); if (parser != null) { setStatus("Loaded parser."); parserFileLabel.setText("Parser: " + filename); parseButton.setEnabled(true); parseNextButton.setEnabled(true); saveOutputButton.setEnabled(true); tlp = parser.getOp().langpack(); encoding = tlp.getEncoding(); } } }
/** * * @param args File to run on */ public static void main(String[] args) { if(args.length < 1) { System.out.printf("Usage: java %s tree_file%n", NegraPennTreeReaderFactory.class.getName()); return; } TreebankLanguagePack tlp = new NegraPennLanguagePack(); TreeReaderFactory trf = new NegraPennTreeReaderFactory(2,false,false,tlp); try { TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding())); for (Tree t; (t = tr.readTree()) != null; ) { t.pennPrint(); } tr.close(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
optionsString.append("Reading trees in XML format.\n"); readPennFormat = false; setInputEncoding(tlp.getEncoding()); i++;
this.detailedAnnotations = detailedAnnotations; stream = new ReaderInputStream(in, tlp.getEncoding()); treeFactory = new LabeledScoredTreeFactory(); treeNormalizer =
String encoding = parser.config.tlp.getEncoding(); String inputFilename = props.getProperty("textFile"); BufferedReader input;
/** * Creates new form ParserPanel */ public ParserPanel() { initComponents(); // create dialogs for file selection jfc = new JFileChooser(System.getProperty("user.dir")); pageDialog = new OpenPageDialog(new Frame(), true); pageDialog.setFileChooser(jfc); jfcLocation = new JFileChooserLocation(jfc); tlp = new PennTreebankLanguagePack(); encoding = tlp.getEncoding(); setFont(); // create a timer timer = new javax.swing.Timer(ONE_SECOND, new TimerListener()); // for (un)highlighting text highlightStyle = new SimpleAttributeSet(); normalStyle = new SimpleAttributeSet(); StyleConstants.setBackground(highlightStyle, Color.yellow); StyleConstants.setBackground(normalStyle, textPane.getBackground()); this.chooseJarParser = new JarFileChooser(".*\\.ser\\.gz", this); }
/** * Stores the passed-in TreebankLanguagePack and sets up charset encodings. * * @param tlp The treebank language pack to use */ protected AbstractTreebankParserParams(TreebankLanguagePack tlp) { this.tlp = tlp; inputEncoding = tlp.getEncoding(); outputEncoding = tlp.getEncoding(); generateOriginalDependencies = false; }
/** * Stores the passed-in TreebankLanguagePack and sets up charset encodings. * * @param tlp The treebank language pack to use */ protected AbstractTreebankParserParams(TreebankLanguagePack tlp) { this.tlp = tlp; inputEncoding = tlp.getEncoding(); outputEncoding = tlp.getEncoding(); }
/** * Stores the passed-in TreebankLanguagePack and sets up charset encodings. * * @param tlp The treebank language pack to use */ protected AbstractTreebankParserParams(TreebankLanguagePack tlp) { this.tlp = tlp; inputEncoding = tlp.getEncoding(); outputEncoding = tlp.getEncoding(); generateOriginalDependencies = false; }
/** * Stores the passed-in TreebankLanguagePack and sets up charset encodings. * * @param tlp The treebank language pack to use */ protected AbstractTreebankParserParams(TreebankLanguagePack tlp) { this.tlp = tlp; inputEncoding = tlp.getEncoding(); outputEncoding = tlp.getEncoding(); }
/** * Read parse trees from a Reader. * * @param in Reader * @param tf TreeFactory -- factory to create some kind of Tree * @param tn the method of normalizing trees */ public FrenchXMLTreeReader(Reader in, TreeFactory tf, TreeNormalizer tn) { TreebankLanguagePack tlp = new FrenchTreebankLanguagePack(); stream = new ReaderInputStream(in,tlp.getEncoding()); treeFactory = tf; treeNormalizer = tn; DocumentBuilder parser = XMLUtils.getXmlParser(); try { final Document xml = parser.parse(stream); final Element root = xml.getDocumentElement(); sentences = root.getElementsByTagName(NODE_SENT); sentIdx = 0; } catch (SAXException | IOException e) { e.printStackTrace(); } }
/** * Read parse trees from a Reader. * * @param in Reader * @param tf TreeFactory -- factory to create some kind of Tree * @param tn the method of normalizing trees */ public FrenchXMLTreeReader(Reader in, TreeFactory tf, TreeNormalizer tn) { TreebankLanguagePack tlp = new FrenchTreebankLanguagePack(); stream = new ReaderInputStream(in,tlp.getEncoding()); treeFactory = tf; treeNormalizer = tn; DocumentBuilder parser = XMLUtils.getXmlParser(); try { final Document xml = parser.parse(stream); final Element root = xml.getDocumentElement(); sentences = root.getElementsByTagName(NODE_SENT); sentIdx = 0; } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
public SpanishTreebankParserParams() { super(new SpanishTreebankLanguagePack()); setInputEncoding(treebankLanguagePack().getEncoding()); setHeadFinder(new SpanishHeadFinder()); optionsString = new StringBuilder(); optionsString.append(getClass().getSimpleName() + "\n"); buildAnnotations(); }
public SpanishTreebankParserParams() { super(new SpanishTreebankLanguagePack()); setInputEncoding(treebankLanguagePack().getEncoding()); setHeadFinder(new SpanishHeadFinder()); optionsString = new StringBuilder(); optionsString.append(getClass().getSimpleName() + "\n"); buildAnnotations(); }
/** * Creates new form ParserPanel */ public ParserPanel() { initComponents(); // create dialogs for file selection jfc = new JFileChooser(System.getProperty("user.dir")); pageDialog = new OpenPageDialog(new Frame(), true); pageDialog.setFileChooser(jfc); jfcLocation = new JFileChooserLocation(jfc); tlp = new PennTreebankLanguagePack(); encoding = tlp.getEncoding(); setFont(); // create a timer timer = new javax.swing.Timer(ONE_SECOND, new TimerListener()); // for (un)highlighting text highlightStyle = new SimpleAttributeSet(); normalStyle = new SimpleAttributeSet(); StyleConstants.setBackground(highlightStyle, Color.yellow); StyleConstants.setBackground(normalStyle, textPane.getBackground()); this.chooseJarParser = new JarFileChooser(".*\\.ser\\.gz", this); }