private void parse( TikaInputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if ("telnet".equals(uri.getScheme())) { try (Socket socket = new Socket(uri.getHost(), uri.getPort())) { new ParsingTask(stream, new FilterOutputStream(socket.getOutputStream()) { @Override public void close() throws IOException { socket.shutdownOutput(); } }).parse( socket.getInputStream(), handler, metadata, context); } } else { URL url = uri.toURL(); URLConnection connection = url.openConnection(); connection.setDoOutput(true); connection.connect(); try (InputStream input = connection.getInputStream()) { new ParsingTask(stream, connection.getOutputStream()).parse( new CloseShieldInputStream(input), handler, metadata, context); } } }
/** * @since Apache Tika 0.9 */ public QName extractRootElement(InputStream stream) { ExtractorHandler handler = new ExtractorHandler(); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), EMPTY_CONTEXT); } catch (Exception ignore) { } return handler.rootElement; }
final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp); if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer();
poifs = new POIFSFileSystem(new CloseShieldInputStream(stream)); } catch (RuntimeException e) { throw new IOExceptionWithCause(e);
/** * @since Apache Tika 0.9 */ public QName extractRootElement(InputStream stream) { ExtractorHandler handler = new ExtractorHandler(); try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(false); try { factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (SAXNotRecognizedException e) { // TIKA-271 and TIKA-1000: Some XML parsers do not support the secure-processing // feature, even though it's required by JAXP in Java 5. Ignoring // the exception is fine here, deployments without this feature // are inherently vulnerable to XML denial-of-service attacks. } factory.newSAXParser().parse( new CloseShieldInputStream(stream), new OfflineContentHandler(handler)); } catch (Exception ignore) { } return handler.rootElement; }
private void parse( TikaInputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if ("telnet".equals(uri.getScheme())) { try (Socket socket = new Socket(uri.getHost(), uri.getPort())) { new ParsingTask(stream, new FilterOutputStream(socket.getOutputStream()) { @Override public void close() throws IOException { socket.shutdownOutput(); } }).parse( socket.getInputStream(), handler, metadata, context); } } else { URL url = uri.toURL(); URLConnection connection = url.openConnection(); connection.setDoOutput(true); connection.connect(); try (InputStream input = connection.getInputStream()) { new ParsingTask(stream, connection.getOutputStream()).parse( new CloseShieldInputStream(input), handler, metadata, context); } } }
private void parse( TikaInputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if ("telnet".equals(uri.getScheme())) { try (Socket socket = new Socket(uri.getHost(), uri.getPort())) { new ParsingTask(stream, new FilterOutputStream(socket.getOutputStream()) { @Override public void close() throws IOException { socket.shutdownOutput(); } }).parse( socket.getInputStream(), handler, metadata, context); } } else { URL url = uri.toURL(); URLConnection connection = url.openConnection(); connection.setDoOutput(true); connection.connect(); try (InputStream input = connection.getInputStream()) { new ParsingTask(stream, connection.getOutputStream()).parse( new CloseShieldInputStream(input), handler, metadata, context); } } }
/** Extracts the archive resource and then runs the batch-import process on it. */ private void importDataArchive( final Resource resource, final ArchiveInputStream resourceStream, BatchImportOptions options) { final File tempDir = Files.createTempDir(); try { ArchiveEntry archiveEntry; while ((archiveEntry = resourceStream.getNextEntry()) != null) { final File entryFile = new File(tempDir, archiveEntry.getName()); if (!archiveEntry.isDirectory()) { entryFile.getParentFile().mkdirs(); IOUtils.copy( new CloseShieldInputStream(resourceStream), new FileOutputStream(entryFile)); } } importDataDirectory(tempDir, null, options); } catch (IOException e) { throw new RuntimeException( "Failed to extract data from '" + resource + "' to '" + tempDir + "' for batch import.", e); } finally { FileUtils.deleteQuietly(tempDir); } }
/** Extracts the archive resource and then runs the batch-import process on it. */ private void importDataArchive( final Resource resource, final ArchiveInputStream resourceStream, BatchImportOptions options) { final File tempDir = Files.createTempDir(); try { ArchiveEntry archiveEntry; while ((archiveEntry = resourceStream.getNextEntry()) != null) { final File entryFile = new File(tempDir, archiveEntry.getName()); if (!archiveEntry.isDirectory()) { entryFile.getParentFile().mkdirs(); IOUtils.copy( new CloseShieldInputStream(resourceStream), new FileOutputStream(entryFile)); } } importDataDirectory(tempDir, null, options); } catch (IOException e) { throw new RuntimeException( "Failed to extract data from '" + resource + "' to '" + tempDir + "' for batch import.", e); } finally { FileUtils.deleteQuietly(tempDir); } }
private MediaType getMediaType(BufferedInputStream inputStream, String fileName) throws IOException { final TikaInputStream tikaInputStreamStream = TikaInputStream.get(new CloseShieldInputStream(inputStream)); try { final Detector detector = new DefaultDetector(); final Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); final MediaType type = detector.detect(tikaInputStreamStream, metadata); logger.debug("Determined '{}' for '{}'", type, fileName); return type; } catch (IOException e) { logger.warn("Failed to determine media type for '" + fileName + "' assuming XML", e); return null; } finally { IOUtils.closeQuietly(tikaInputStreamStream); // Reset the buffered stream to make up for anything read by the detector inputStream.reset(); } }
/** * @since Apache Tika 0.9 */ public QName extractRootElement(InputStream stream) { ExtractorHandler handler = new ExtractorHandler(); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), EMPTY_CONTEXT); } catch (Exception ignore) { } return handler.rootElement; }
private MediaType getMediaType(BufferedInputStream inputStream, String fileName) throws IOException { final TikaInputStream tikaInputStreamStream = TikaInputStream.get(new CloseShieldInputStream(inputStream)); try { final Detector detector = new DefaultDetector(); final Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); final MediaType type = detector.detect(tikaInputStreamStream, metadata); logger.debug("Determined '{}' for '{}'", type, fileName); return type; } catch (IOException e) { logger.warn("Failed to determine media type for '" + fileName + "' assuming XML", e); return null; } finally { IOUtils.closeQuietly(tikaInputStreamStream); // Reset the buffered stream to make up for anything read by the detector inputStream.reset(); } }
stream = new CloseShieldInputStream(stream);
final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp); if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer();
final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp); if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer();
void delegateParsing(final InputStream input, final ContentHandler handler, final Metadata metadata) throws IOException, SAXException { try (final TikaInputStream tis = TikaInputStream.get(new CloseShieldInputStream(input))) { if (input instanceof TikaInputStream) { final Object container = ((TikaInputStream) input).getOpenContainer(); if (container != null) { tis.setOpenContainer(container); } } // Use the delegate parser to parse this entry. DELEGATING_PARSER.parse(tis, handler, metadata, context); } catch (final EncryptedDocumentException e) { logger.error("Unable to decrypt encrypted document embedded in document: \"{}\" ({}) (in \"{}\").", metadata.get(Metadata.RESOURCE_NAME_KEY), metadata.get(Metadata.CONTENT_TYPE), root, e); metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM, ExceptionUtils.getFilteredStackTrace(e)); } catch (final TikaException e) { logger.error("Unable to parse embedded document: \"{}\" ({}) (in \"{}\").", metadata.get(Metadata.RESOURCE_NAME_KEY), metadata.get(Metadata.CONTENT_TYPE), root, e); metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM, ExceptionUtils.getFilteredStackTrace(e)); } }
stream = new CloseShieldInputStream(stream);
pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), MemoryUsageSetting.setupMixed(100 * 1024 * 1024)); new CloseShieldInputStream(stream), "", MemoryUsageSetting.setupMixed(100 * 1024 * 1024)); } catch (Exception e) {
final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp); if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer();
pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), MemoryUsageSetting.setupMixed(100 * 1024 * 1024));