/** * Unzip the byteSource to the output directory. If cacheLocally is true, the byteSource is cached to local disk before unzipping. * This may cause more predictable behavior than trying to unzip a large file directly off a network stream, for example. * * @param byteSource The ByteSource which supplies the zip data * @param outDir The output directory to put the contents of the zip * @param cacheLocally A boolean flag to indicate if the data should be cached locally * * @return A FileCopyResult containing the result of writing the zip entries to disk * * @throws IOException */ public static FileUtils.FileCopyResult unzip( final ByteSource byteSource, final File outDir, boolean cacheLocally ) throws IOException { return unzip(byteSource, outDir, FileUtils.IS_EXCEPTION, cacheLocally); }
/** * Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped * * @param directory The directory whose contents should be added to the zip in the output stream. * @param outputZipFile The output file to write the zipped data to * * @return The number of bytes (uncompressed) read from the input directory. * * @throws IOException */ public static long zip(File directory, File outputZipFile) throws IOException { return zip(directory, outputZipFile, false); }
/** * Gunzip from the input stream to the output file * * @param in The compressed input stream to read from * @param outFile The file to write the uncompressed results to * * @return A FileCopyResult of the file written */ public static FileUtils.FileCopyResult gunzip(final ByteSource in, File outFile) { return gunzip(in, outFile, FileUtils.IS_EXCEPTION); }
/** * Uncompress using a gzip uncompress algorithm from the `pulledFile` to the `outDir`. * Unlike `org.apache.druid.java.util.common.CompressionUtils.gunzip`, this function takes an output *DIRECTORY* and tries to guess the file name. * It is recommended that the caller use `org.apache.druid.java.util.common.CompressionUtils.gunzip` and specify the output file themselves to ensure names are as expected * * @param pulledFile The source file * @param outDir The destination directory to put the resulting file * * @throws IOException on propagated IO exception, IAE if it cannot determine the proper new name for `pulledFile` */ @Deprecated // See description for alternative public static void gunzip(File pulledFile, File outDir) { final File outFile = new File(outDir, org.apache.druid.java.util.common.CompressionUtils.getGzBaseName(pulledFile.getName())); org.apache.druid.java.util.common.CompressionUtils.gunzip(pulledFile, outFile); if (!pulledFile.delete()) { log.error("Could not delete tmpFile[%s].", pulledFile); } }
if (!isZip(outputZipFile.getName())) { log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory); return FileUtils.writeAtomically(outputZipFile, out -> zip(directory, out)); } else { try ( final OutputStream out = Channels.newOutputStream(fileChannel) ) { return zip(directory, out);
if (CompressionUtils.isZip(s3Coords.path)) { final FileUtils.FileCopyResult result = CompressionUtils.unzip( byteSource, outDir, return result; if (CompressionUtils.isGz(s3Coords.path)) { final String fname = Files.getNameWithoutExtension(uri.getPath()); final File outFile = new File(outDir, fname); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY); log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath()); return result;
@Test public void testGoodZipStream() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodZipStream"); final File zipFile = new File(tmpDir, "compressionUtilTest.zip"); CompressionUtils.zip(testDir, new FileOutputStream(zipFile)); final File newDir = new File(tmpDir, "newDir"); newDir.mkdir(); CompressionUtils.unzip(new FileInputStream(zipFile), newDir); final Path newPath = Paths.get(newDir.getAbsolutePath(), testFile.getName()); Assert.assertTrue(newPath.toFile().exists()); try (final FileInputStream inputStream = new FileInputStream(newPath.toFile())) { assertGoodDataStream(inputStream); } }
@Test public void testGoodGzipByteSource() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodGzipByteSource"); final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), Files.asByteSink(gzFile), Predicates.alwaysTrue()); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), gzFile.getName())) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { throw new IOE("Unable to delete file [%s]", testFile.getAbsolutePath()); } Assert.assertFalse(testFile.exists()); CompressionUtils.gunzip(Files.asByteSource(gzFile), testFile); Assert.assertTrue(testFile.exists()); try (final InputStream inputStream = new FileInputStream(testFile)) { assertGoodDataStream(inputStream); } }
@Override protected InputStream wrapObjectStream(File object, InputStream stream) throws IOException { return CompressionUtils.decompress(stream, object.getPath()); }
@Test public void testEvilZip() throws IOException { final File tmpDir = temporaryFolder.newFolder("testEvilZip"); final File evilResult = new File("/tmp/evil.txt"); java.nio.file.Files.deleteIfExists(evilResult.toPath()); File evilZip = new File(tmpDir, "evil.zip"); java.nio.file.Files.deleteIfExists(evilZip.toPath()); CompressionUtils.makeEvilZip(evilZip); try { CompressionUtils.unzip(evilZip, tmpDir); } catch (ISE ise) { Assert.assertTrue(ise.getMessage().contains("does not start with outDir")); Assert.assertFalse("Zip exploit triggered, /tmp/evil.txt was written.", evilResult.exists()); return; } Assert.fail("Exception was not thrown for malicious zip file"); }
@Test(expected = IAE.class) public void testBadShortGzName() { CompressionUtils.getGzBaseName(".gz"); }
@Test public void testGoodGZStream() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodGZStream"); final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(new FileInputStream(testFile), new FileOutputStream(gzFile)); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = new GZIPInputStream(new FileInputStream(gzFile))) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { throw new IOE("Unable to delete file [%s]", testFile.getAbsolutePath()); } Assert.assertFalse(testFile.exists()); CompressionUtils.gunzip(new FileInputStream(gzFile), testFile); Assert.assertTrue(testFile.exists()); try (final InputStream inputStream = new FileInputStream(testFile)) { assertGoodDataStream(inputStream); } }
/** * Get the file name without the .gz extension * * @param fname The name of the gzip file * * @return fname without the ".gz" extension * * @throws IAE if fname is not a valid "*.gz" file name */ public static String getGzBaseName(String fname) { final String reducedFname = Files.getNameWithoutExtension(fname); if (isGz(fname) && !reducedFname.isEmpty()) { return reducedFname; } throw new IAE("[%s] is not a valid gz file name", fname); }
return result; if (CompressionUtils.isZip(sourceFile.getName())) { try { final FileUtils.FileCopyResult result = CompressionUtils.unzip( Files.asByteSource(sourceFile), dir, if (CompressionUtils.isGz(sourceFile.getName())) { final File outFile = new File(dir, CompressionUtils.getGzBaseName(sourceFile.getName())); final FileUtils.FileCopyResult result = CompressionUtils.gunzip( Files.asByteSource(sourceFile), outFile,
@Test public void testGoodZipCompressUncompress() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodZipCompressUncompress"); final File zipFile = new File(tmpDir, "compressionUtilTest.zip"); try { CompressionUtils.zip(testDir, zipFile); final File newDir = new File(tmpDir, "newDir"); newDir.mkdir(); CompressionUtils.unzip(zipFile, newDir); final Path newPath = Paths.get(newDir.getAbsolutePath(), testFile.getName()); Assert.assertTrue(newPath.toFile().exists()); try (final FileInputStream inputStream = new FileInputStream(newPath.toFile())) { assertGoodDataStream(inputStream); } } finally { if (zipFile.exists()) { zipFile.delete(); } if (tmpDir.exists()) { tmpDir.delete(); } } }
final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), Files.asByteSink(gzFile), Predicates.alwaysTrue()); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), "file.gz")) { assertGoodDataStream(inputStream); CompressionUtils.gunzip( new FileInputStream(gzFile), new FilterOutputStream( new FileOutputStream(testFile)
/** * Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped * * @param directory The directory whose contents should be added to the zip in the output stream. * @param outputZipFile The output file to write the zipped data to * @param fsync True if the output file should be fsynced to disk * * @return The number of bytes (uncompressed) read from the input directory. * * @throws IOException */ public static long zip(File directory, File outputZipFile, boolean fsync) throws IOException { if (!isZip(outputZipFile.getName())) { log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory); } try (final FileOutputStream out = new FileOutputStream(outputZipFile)) { long bytes = zip(directory, out); // For explanation of why fsyncing here is a good practice: // https://github.com/apache/incubator-druid/pull/5187#pullrequestreview-85188984 if (fsync) { out.getChannel().force(true); } return bytes; } }
@Override protected InputStream wrapObjectStream(URI object, InputStream stream) throws IOException { return CompressionUtils.decompress(stream, object.getPath()); }
@Test public void testEvilZipInputStream() throws IOException { final File tmpDir = temporaryFolder.newFolder("testEvilZip"); final File evilResult = new File("/tmp/evil.txt"); java.nio.file.Files.deleteIfExists(evilResult.toPath()); File evilZip = new File(tmpDir, "evil.zip"); java.nio.file.Files.deleteIfExists(evilZip.toPath()); CompressionUtils.makeEvilZip(evilZip); try { CompressionUtils.unzip(new FileInputStream(evilZip), tmpDir); } catch (ISE ise) { Assert.assertTrue(ise.getMessage().contains("does not start with outDir")); Assert.assertFalse("Zip exploit triggered, /tmp/evil.txt was written.", evilResult.exists()); return; } Assert.fail("Exception was not thrown for malicious zip file"); }
/** * Uncompress using a gzip uncompress algorithm from the `pulledFile` to the `outDir`. * Unlike `org.apache.druid.java.util.common.CompressionUtils.gunzip`, this function takes an output *DIRECTORY* and tries to guess the file name. * It is recommended that the caller use `org.apache.druid.java.util.common.CompressionUtils.gunzip` and specify the output file themselves to ensure names are as expected * * @param pulledFile The source file * @param outDir The destination directory to put the resulting file * * @throws IOException on propagated IO exception, IAE if it cannot determine the proper new name for `pulledFile` */ @Deprecated // See description for alternative public static void gunzip(File pulledFile, File outDir) { final File outFile = new File(outDir, org.apache.druid.java.util.common.CompressionUtils.getGzBaseName(pulledFile.getName())); org.apache.druid.java.util.common.CompressionUtils.gunzip(pulledFile, outFile); if (!pulledFile.delete()) { log.error("Could not delete tmpFile[%s].", pulledFile); } }