public static void checkFileSize(File indexFile) throws IOException { final long fileSize = indexFile.length(); if (fileSize > Integer.MAX_VALUE) { throw new IOE("File[%s] too large[%d]", indexFile, fileSize); } }
public static int getVersionFromDir(File inDir) throws IOException { File versionFile = new File(inDir, "version.bin"); if (versionFile.exists()) { return Ints.fromByteArray(Files.toByteArray(versionFile)); } final File indexFile = new File(inDir, "index.drd"); int version; if (indexFile.exists()) { try (InputStream in = new FileInputStream(indexFile)) { version = in.read(); } return version; } throw new IOE("Invalid segment dir [%s]. Can't find either of version.bin or index.drd.", inDir); } }
private void copyFilesWithChecks(final FileSystem fs, final Path from, final Path to) throws IOException { if (!HadoopFsWrapper.rename(fs, from, to)) { if (fs.exists(to)) { log.info( "Unable to rename temp file [%s] to segment path [%s], it may have already been pushed by a replica task.", from, to ); } else { throw new IOE("Failed to rename temp file [%s] and final segment path [%s] is not present.", from, to); } } }
private String getCurrentKnownLeader(final boolean cached) throws IOException { final String leader = currentKnownLeader.accumulateAndGet( null, (current, given) -> current == null || !cached ? pickOneHost() : current ); if (leader == null) { throw new IOE("No known server"); } else { return leader; } }
@Override public InputStream getInputStream(URI uri) throws IOException { try { return buildFileObject(uri).openInputStream(); } catch (AmazonServiceException e) { throw new IOE(e, "Could not load URI [%s]", uri); } }
throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
@Override public long push() throws IOException { try { progressable.progress(); if (outputFS.exists(descriptorPath)) { if (!outputFS.delete(descriptorPath, false)) { throw new IOE("Failed to delete descriptor at [%s]", descriptorPath); } } try (final OutputStream descriptorOut = outputFS.create( descriptorPath, true, DEFAULT_FS_BUFFER_SIZE, progressable )) { HadoopDruidIndexerConfig.JSON_MAPPER.writeValue(descriptorOut, segment); } } catch (RuntimeException | IOException ex) { log.info(ex, "Exception in descriptor pusher retry loop"); throw ex; } return -1; } },
@Override public void pushTaskLog(final String taskid, File file) throws IOException { if (config.getDirectory().exists() || config.getDirectory().mkdirs()) { final File outputFile = fileForTask(taskid, file.getName()); Files.copy(file, outputFile); log.info("Wrote task log to: %s", outputFile); } else { throw new IOE("Unable to create task log dir[%s]", config.getDirectory()); } }
@Override public void pushTaskReports(String taskid, File reportFile) throws IOException { if (config.getDirectory().exists() || config.getDirectory().mkdirs()) { final File outputFile = fileForTask(taskid, reportFile.getName()); Files.copy(reportFile, outputFile); log.info("Wrote task report to: %s", outputFile); } else { throw new IOE("Unable to create task report dir[%s]", config.getDirectory()); } }
throw new IOE(e, "Failed to stream logs from: %s", taskKey);
@Override public void killOlderThan(long timestamp) throws IOException { Path taskLogDir = new Path(config.getDirectory()); FileSystem fs = taskLogDir.getFileSystem(hadoopConfig); if (fs.exists(taskLogDir)) { if (!fs.isDirectory(taskLogDir)) { throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir); } RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir); while (iter.hasNext()) { LocatedFileStatus file = iter.next(); if (file.getModificationTime() < timestamp) { Path p = file.getPath(); log.info("Deleting hdfs task log [%s].", p.toUri().toString()); fs.delete(p, true); } if (Thread.currentThread().isInterrupted()) { throw new IOException( new InterruptedException("Thread interrupted. Couldn't delete all tasklogs.") ); } } } } }
@Override public void close() throws IOException { open = false; internalFiles.put(name, new Metadata(currOut.getFileNum(), startOffset, currOut.getCurrOffset())); writerCurrentlyInUse = false; if (bytesWritten != currOut.getCurrOffset() - startOffset) { throw new ISE("WTF? Perhaps there is some concurrent modification going on?"); } if (bytesWritten != size) { throw new IOE("Expected [%,d] bytes, only saw [%,d], potential corruption?", size, bytesWritten); } // Merge temporary files on to the main smoosh file. mergeWithSmoosher(); } };
throw new IOE(e, "Failed to stream logs from: %s", taskKey);
public static class TaskNotRunnableException extends RuntimeException { public TaskNotRunnableException(String message) { super(message); } }
/** * Returns an input stream for a s3 object. The returned input stream is not thread-safe. */ @Override public InputStream openInputStream() throws IOException { try { if (s3Object == null) { // lazily promote to full GET s3Object = s3Client.getObject(objectSummary.getBucketName(), objectSummary.getKey()); } final InputStream in = s3Object.getObjectContent(); final Closer closer = Closer.create(); closer.register(in); closer.register(s3Object); return new FilterInputStream(in) { @Override public void close() throws IOException { closer.close(); } }; } catch (AmazonServiceException e) { throw new IOE(e, "Could not load S3 URI [%s]", uri); } }
@Test public void testGoodGZStream() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodGZStream"); final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(new FileInputStream(testFile), new FileOutputStream(gzFile)); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = new GZIPInputStream(new FileInputStream(gzFile))) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { throw new IOE("Unable to delete file [%s]", testFile.getAbsolutePath()); } Assert.assertFalse(testFile.exists()); CompressionUtils.gunzip(new FileInputStream(gzFile), testFile); Assert.assertTrue(testFile.exists()); try (final InputStream inputStream = new FileInputStream(testFile)) { assertGoodDataStream(inputStream); } }
/** * Returns the "version" (aka last modified timestamp) of the URI * * @param uri The URI to check the last timestamp * * @return The time in ms of the last modification of the URI in String format * * @throws IOException */ @Override public String getVersion(URI uri) throws IOException { try { final S3Coords coords = new S3Coords(checkURI(uri)); final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.bucket, coords.path); return StringUtils.format("%d", objectSummary.getLastModified().getTime()); } catch (AmazonServiceException e) { if (S3Utils.isServiceExceptionRecoverable(e)) { // The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri); } else { throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri); } } }
@Nullable private Map<String, LookupExtractorFactoryContainer> tryGetLookupListFromCoordinator(String tier) throws Exception { final FullResponseHolder response = fetchLookupsForTier(tier); if (response.getStatus().equals(HttpResponseStatus.NOT_FOUND)) { LOG.warn("No lookups found for tier [%s], response [%s]", tier, response); return null; } else if (!response.getStatus().equals(HttpResponseStatus.OK)) { throw new IOE( "Error while fetching lookup code from Coordinator with status[%s] and content[%s]", response.getStatus(), response.getContent() ); } // Older version of getSpecificTier returns a list of lookup names. // Lookup loading is performed via snapshot if older version is present. // This check is only for backward compatibility and should be removed in a future release if (response.getContent().startsWith("[")) { LOG.info( "Failed to retrieve lookup information from coordinator, " + "because coordinator appears to be running on older Druid version. " + "Attempting to load lookups using snapshot instead" ); return null; } else { return jsonMapper.readValue(response.getContent(), LOOKUPS_ALL_REFERENCE); } }
@Test public void testGoodGzipByteSource() throws IOException { final File tmpDir = temporaryFolder.newFolder("testGoodGzipByteSource"); final File gzFile = new File(tmpDir, testFile.getName() + ".gz"); Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), Files.asByteSink(gzFile), Predicates.alwaysTrue()); Assert.assertTrue(gzFile.exists()); try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), gzFile.getName())) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { throw new IOE("Unable to delete file [%s]", testFile.getAbsolutePath()); } Assert.assertFalse(testFile.exists()); CompressionUtils.gunzip(Files.asByteSource(gzFile), testFile); Assert.assertTrue(testFile.exists()); try (final InputStream inputStream = new FileInputStream(testFile)) { assertGoodDataStream(inputStream); } }