public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); parse(tis, handler, metadata, context); } finally { tmp.dispose(); } }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); TikaInputStream tis = TikaInputStream.get(stream, tmp); try { if (digester != null) { digester.digest(tis, metadata, context); } super.parse(tis, handler, metadata, context); } finally { tmp.dispose(); } } }
@Override public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException { TemporaryResources tmp = new TemporaryResources(); TikaInputStream tis = TikaInputStream.get(is, tmp); try { for (DigestingParser.Digester digester : digesters) { digester.digest(tis, m, parseContext); } } finally { try { tmp.dispose(); } catch (TikaException e) { throw new IOExceptionWithCause(e); } } } }
/** * Executes the configured external command and passes the given document * stream as a simple XHTML document to the given SAX content handler. * Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)} * has been called to set patterns. */ public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); TemporaryResources tmp = new TemporaryResources(); try { parse(TikaInputStream.get(stream, tmp), xhtml, metadata, tmp); } finally { tmp.dispose(); } }
} finally { if (tmp != null) { tmp.dispose();
/** * Use this to parse content without starting a new document. * This appends SAX events to xhtml without re-adding the metadata, body start, etc. * * @param stream inputstream * @param xhtml handler * @param config TesseractOCRConfig to use for this parse * @throws IOException * @throws SAXException * @throws TikaException * */ public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException { // If Tesseract is not on the path with the current config, do not try to run OCR // getSupportedTypes shouldn't have listed us as handling it, so this should only // occur if someone directly calls this parser, not via DefaultParser or similar if (! hasTesseract(config)) return; TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp); File tmpImgFile = tmp.createTemporaryFile(); parse(tikaStream, tmpImgFile, parseContext, xhtml, config); } finally { tmp.dispose(); } }
} finally { if (tmp != null) { tmp.dispose();
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseWebP(tis.getFile()); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseTiff(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
xhtml.endDocument(); } finally { tmp.dispose();
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
public MediaType detect(InputStream input, Metadata metadata) throws IOException { // Check if we have access to the document if (input == null) { return MediaType.OCTET_STREAM; } TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(input, tmp); byte[] prefix = new byte[1024]; // enough for all known formats int length = tis.peek(prefix); MediaType type = detectArchiveFormat(prefix, length); if (type == TIFF) { return TIFF; } else if (PackageParser.isZipArchive(type) && TikaInputStream.isTikaInputStream(input)) { return detectZipFormat(tis); } else if (!type.equals(MediaType.OCTET_STREAM)) { return type; } else { return detectCompressorFormat(prefix, length); } } finally { try { tmp.dispose(); } catch (TikaException e) { // ignore } } }
public void parse( InputStream stream, ContentHandler ignored, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); // Figure out what we have to process String filename = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); MediaType type = detector.detect(tis, metadata); if (extractor == null) { // Let the handler process the embedded resource handler.handle(filename, type, tis); } else { // Use a temporary file to process the stream twice File file = tis.getFile(); // Let the handler process the embedded resource try (InputStream input = TikaInputStream.get(file)) { handler.handle(filename, type, input); } // Recurse extractor.extract(tis, extractor, handler); } } finally { tmp.dispose(); } }
@Override public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException { metadata.set(Metadata.CONTENT_TYPE,geoInfoType); DataStore dataStore= null; DefaultMetadata defaultMetadata=null; XHTMLContentHandler xhtmlContentHandler=new XHTMLContentHandler(contentHandler,metadata); TemporaryResources tmp = TikaInputStream.isTikaInputStream(inputStream) ? null : new TemporaryResources(); try { TikaInputStream tikaInputStream = TikaInputStream.get(inputStream,tmp); File file= tikaInputStream.getFile(); dataStore = DataStores.open(file); defaultMetadata=new DefaultMetadata(dataStore.getMetadata()); if(defaultMetadata!=null) extract(xhtmlContentHandler, metadata, defaultMetadata); } catch (UnsupportedStorageException e) { throw new TikaException("UnsupportedStorageException",e); } catch (DataStoreException e) { throw new TikaException("DataStoreException", e); } finally { if (tmp != null) { tmp.dispose(); } } }
tmp.dispose();
extractor.parseEmbedded(tis, xhtml, entrydata, true); } finally { tmp.dispose();
throw new IOExceptionWithCause("error writing OCR content from PDF", e); } finally { tmp.dispose();
tmp.dispose();
} finally { try { tmp.dispose(); } catch (TikaException e) { throw new IOExceptionWithCause(e);