static boolean hasPython() { // check if python is installed and it has the required dependencies for the rotation program to run boolean hasPython = false; TemporaryResources tmp = null; try { tmp = new TemporaryResources(); File importCheck = tmp.createTemporaryFile(); String prg = "import numpy, matplotlib, skimage, _tkinter"; OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(importCheck), Charset.forName("UTF-8")); out.write(prg); out.close(); Process p = Runtime.getRuntime().exec("python " + importCheck.getAbsolutePath()); if (p.waitFor() == 0) { hasPython = true; } } catch (Exception e) { } finally { IOUtils.closeQuietly(tmp); } return hasPython; }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
output = tmp.createTemporaryFile(); outputFromStdOut = false; cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
tempOutputFile = tmp.createTemporaryFile(); commandSegment = commandSegment.replace( ExternalParser.OUTPUT_FILE_TOKEN,
File rotationScript = tmp.createTemporaryFile(); Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
File tmpFile = tmp.createTemporaryFile(); FileUtils.copyFile(input, tmpFile); processImage(tmpFile, config);
/** * Use this to parse content without starting a new document. * This appends SAX events to xhtml without re-adding the metadata, body start, etc. * * @param stream inputstream * @param xhtml handler * @param config TesseractOCRConfig to use for this parse * @throws IOException * @throws SAXException * @throws TikaException * */ public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException { // If Tesseract is not on the path with the current config, do not try to run OCR // getSupportedTypes shouldn't have listed us as handling it, so this should only // occur if someone directly calls this parser, not via DefaultParser or similar if (! hasTesseract(config)) return; TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp); File tmpImgFile = tmp.createTemporaryFile(); parse(tikaStream, tmpImgFile, parseContext, xhtml, config); } finally { tmp.dispose(); } }
File tmpOCROutputFile = tmp.createTemporaryFile();
static boolean hasPython() { // check if python is installed and it has the required dependencies for the rotation program to run boolean hasPython = false; TemporaryResources tmp = null; try { tmp = new TemporaryResources(); File importCheck = tmp.createTemporaryFile(); String prg = "import numpy, matplotlib, skimage, _tkinter"; OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(importCheck), Charset.forName("UTF-8")); out.write(prg); out.close(); Process p = Runtime.getRuntime().exec("python " + importCheck.getAbsolutePath()); if (p.waitFor() == 0) { hasPython = true; } } catch (Exception e) { } finally { IOUtils.closeQuietly(tmp); } return hasPython; }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
private EmbeddedTikaDocument saveEmbedded(final String name, final InputStream input, final Metadata metadata) throws IOException { final Path path = tmp.createTemporaryFile().toPath(); // Add the embedded document to the parent with a key (which is the temporary path) so that it can be looked // up later. final EmbeddedTikaDocument embed = parent.addEmbed(path.toString(), new PathIdentifier(), path, metadata); if ((input instanceof TikaInputStream) && ((TikaInputStream) input).getOpenContainer() != null && ( (TikaInputStream) input).getOpenContainer() instanceof DirectoryEntry) { final POIFSFileSystem fs = new POIFSFileSystem(); saveEntries((DirectoryEntry) ((TikaInputStream) input).getOpenContainer(), fs.getRoot()); try (final OutputStream output = Files.newOutputStream(path)) { fs.writeFilesystem(output); } return embed; } final long copied; try { copied = Files.copy(input, path, StandardCopyOption.REPLACE_EXISTING); } finally { input.close(); } if (copied > 0) { logger.info("Copied {} bytes from embedded document \"{}\" in \"{}\" to file.", copied, name, parent); } else { logger.warn("No bytes copied for embedded document \"{}\" in \"{}\". " + "This could indicate a downstream error.", name, parent); } return embed; }
File rotationScript = tmp.createTemporaryFile(); Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
File rotationScript = tmp.createTemporaryFile(); Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
/** * Use this to parse content without starting a new document. * This appends SAX events to xhtml without re-adding the metadata, body start, etc. * * @param stream inputstream * @param xhtml handler * @param config TesseractOCRConfig to use for this parse * @throws IOException * @throws SAXException * @throws TikaException * */ public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException { // If Tesseract is not on the path with the current config, do not try to run OCR // getSupportedTypes shouldn't have listed us as handling it, so this should only // occur if someone directly calls this parser, not via DefaultParser or similar if (! hasTesseract(config)) return; TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp); File tmpImgFile = tmp.createTemporaryFile(); parse(tikaStream, tmpImgFile, parseContext, xhtml, config); } finally { tmp.dispose(); } }
File tmpFile = tmp.createTemporaryFile(); FileUtils.copyFile(input, tmpFile); processImage(tmpFile, config);
/** * Use this to parse content without starting a new document. * This appends SAX events to xhtml without re-adding the metadata, body start, etc. * * @param stream inputstream * @param xhtml handler * @param config TesseractOCRConfig to use for this parse * @throws IOException * @throws SAXException * @throws TikaException * */ public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException { // If Tesseract is not on the path with the current config, do not try to run OCR // getSupportedTypes shouldn't have listed us as handling it, so this should only // occur if someone directly calls this parser, not via DefaultParser or similar if (! hasTesseract(config)) return; TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp); File tmpImgFile = tmp.createTemporaryFile(); parse(tikaStream, tmpImgFile, parseContext, xhtml, config); } finally { tmp.dispose(); } }
File tmpFile = tmp.createTemporaryFile(); FileUtils.copyFile(input, tmpFile); processImage(tmpFile, config);
File tmpOCROutputFile = tmp.createTemporaryFile();
File tmpOCROutputFile = tmp.createTemporaryFile();