/** * This method creates the {@link POIXMLTextExtractor} for the given * <code>opcPackage</code>. * * @param opcPackage is the {@link OPCPackage}. * @return the {@link POIXMLTextExtractor}. * @throws Exception if something goes wrong. */ protected POIXMLTextExtractor createExtractor(OPCPackage opcPackage) throws Exception { return ExtractorFactory.createExtractor(opcPackage); }
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { // Only ever an OLE2 one from the root of the FS return (POIOLE2TextExtractor)createExtractor(fs.getRoot()); }
@Override public InputStream getDestinationStream(Item currentItem, InputStream source, boolean verbose) throws Exception { String text; try { // get input stream from bitstream, pass to filter, get string back POITextExtractor extractor = ExtractorFactory.createExtractor(source); text = extractor.getText(); } catch (IOException | OpenXML4JException | XmlException e) { System.err.format("Invalid File Format: %s%n", e.getMessage()); LOG.error("Unable to parse the bitstream: ", e); throw e; } // if verbose flag is set, print out extracted text to STDOUT if (verbose) { System.out.println(text); } // return the extracted text as a stream. return new ByteArrayInputStream(text.getBytes()); } }
public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { // Figure out the kind of stream // If clearly doesn't do mark/reset, wrap up if(! inp.markSupported()) { inp = new PushbackInputStream(inp, 8); } if(POIFSFileSystem.hasPOIFSHeader(inp)) { return createExtractor(new POIFSFileSystem(inp)); } if(POIXMLDocument.hasOOXMLHeader(inp)) { return createExtractor(OPCPackage.open(inp)); } throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); }
public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { InputStream inp = null; try { inp = new PushbackInputStream( new FileInputStream(f), 8); if(POIFSFileSystem.hasPOIFSHeader(inp)) { return createExtractor(new POIFSFileSystem(inp)); } if(POIXMLDocument.hasOOXMLHeader(inp)) { return createExtractor(OPCPackage.open(f.toString())); } throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); } finally { if(inp != null) inp.close(); } }
public static void main(String[] args) throws Exception { if(args.length < 1) { System.err.println("Use:"); System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]"); System.exit(1); } for(int i=0; i<args.length; i++) { System.out.println(DIVIDER); File f = new File(args[i]); System.out.println(f); POITextExtractor extractor = ExtractorFactory.createExtractor(f); POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor(); System.out.println(" " + DIVIDER); System.out.println(metadataExtractor.getText()); System.out.println(" " + DIVIDER); System.out.println(extractor.getText()); System.out.println(DIVIDER); } } }
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source); if (theExtractor instanceof ExcelExtractor) {
new ExtractorFactory(); POITextExtractor pptExtractor = ExtractorFactory .createExtractor(source);
e.add( createExtractor( (DirectoryNode)dirs.get(i) ) ); e.add( createExtractor(nonPOIFS.get(i)) ); } catch(IllegalArgumentException ie) {
poiExtractor = ExtractorFactory.createExtractor(pkg);