Refine search
private long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); long length = contentSummary.getLength(); return length; }
if (srcFS.getUri().getScheme().equals("hdfs")) { ContentSummary srcContentSummary = srcFS.getContentSummary(src); if (srcContentSummary.getFileCount() > conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES) && srcContentSummary.getLength() > conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE)) { LOG.info("Source is " + srcContentSummary.getLength() + " bytes. (MAX: " + conf.getLongVar( HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE) + ")"); LOG.info("Source is " + srcContentSummary.getFileCount() + " files. (MAX: " + conf.getLongVar( HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES) + ")"); LOG.info("Launch distributed copy (distcp) job.");
@Override public ContentSummary getContentSummary(Path f) throws IOException { // HarFileSystem has a bug where this method does not work properly // if the underlying FS is HDFS. See MAPREDUCE-1877 for more // information. This method is from FileSystem. FileStatus status = getFileStatus(f); if (!status.isDir()) { // f is a file return new ContentSummary(status.getLen(), 1, 0); } // f is a directory long[] summary = {0, 0, 1}; for(FileStatus s : listStatus(f)) { ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) : new ContentSummary(s.getLen(), 1, 0); summary[0] += c.getLength(); summary[1] += c.getFileCount(); summary[2] += c.getDirectoryCount(); } return new ContentSummary(summary[0], summary[1], summary[2]); } }
@Override public int hashCode() { long result = getLength() ^ getFileCount() ^ getDirectoryCount() ^ getSnapshotLength() ^ getSnapshotFileCount() ^ getSnapshotDirectoryCount() ^ getSnapshotSpaceConsumed() ^ getErasureCodingPolicy().hashCode(); return ((int) result) ^ super.hashCode(); }
public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) throws Exception { if (ctx != null) { ContentSummary cs = ctx.getCS(dirPath); if (cs != null) { LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: " + cs.getFileCount() + " num directories: " + cs.getDirectoryCount()); return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); } else { LOG.info("Content Summary not cached for " + dirPath); } } return isEmptyPath(job, dirPath); }
public boolean isEmpty(Path path) throws IOException, MetaException { ContentSummary contents = getFs(path).getContentSummary(path); if (contents != null && contents.getFileCount() == 0 && contents.getDirectoryCount() == 1) { return true; } return false; }
DOMConfigurator.configure("log4j.xml"); FileSystem fs = FileSystem.get(job.getConfiguration()); Path execBasePath = new Path(props.getProperty(ETL_EXECUTION_BASE_PATH)); Path execHistory = new Path(props.getProperty(ETL_EXECUTION_HISTORY_PATH)); if (!fs.exists(execHistory)) { ContentSummary content = fs.getContentSummary(execBasePath); long limit = (long) (content.getQuota() * job.getConfiguration().getFloat(ETL_EXECUTION_HISTORY_MAX_OF_QUOTA, (float) .5)); limit = limit == 0 ? 50000 : limit; long currentCount = content.getFileCount() + content.getDirectoryCount(); FileStatus[] executions = fs.listStatus(execHistory); log.info("removing old execution: " + stat.getPath().getName()); ContentSummary execContent = fs.getContentSummary(stat.getPath()); currentCount -= execContent.getFileCount() + execContent.getDirectoryCount(); fs.delete(stat.getPath(), true); log.info("removing failed execution: " + stat.getPath().getName()); ContentSummary execContent = fs.getContentSummary(stat.getPath()); currentCount -= execContent.getFileCount() + execContent.getDirectoryCount(); fs.delete(stat.getPath(), true);
exists = c.srcFs.exists(c.src), c.destFs.exists(c.dest)); c.srcFs.isFile(c.src), c.destFs.isFile(c.dest)); srcSummary.getDirectoryCount(), dstSummary.getDirectoryCount()); srcSummary.getFileCount(), dstSummary.getFileCount()); srcSummary.getSpaceConsumed(), dstSummary.getSpaceConsumed()); srcSummary.getLength(), dstSummary.getLength()); if (srcSt.length == 1 && c.src.equals(srcSt[0].getPath())) { assertEquals(c.dest, destSt[0].getPath()); assertTrue("Expects file [src=" + c.src + ", srcSt[0]=" + srcSt[0] + ']', !srcSt[0].isDir()); assertTrue("Expects file [dest=" + c.dest + ", destSt[0]=" + destSt[0] + ']', !destSt[0].isDir()); queue.addFirst(new Config(c.srcFs, new Path(c.src, rndFile), c.destFs, new Path(c.dest, rndFile)));
@Test public void testCreateSweeperJobProps() throws Exception { FileSystem mockedFs = createMock(FileSystem.class); Path inputDir = new Path("inputDir"); Path outputDir = new Path("outputDir"); DateUtils dUtils = new DateUtils(new Properties()); DateTime currentHour = dUtils.getCurrentHour(); DateTimeFormatter hourFormatter = dUtils.getDateTimeFormatter("YYYY/MM/dd/HH"); String hour = currentHour.minusHours(1).toString(hourFormatter); Path inputDirWithHour = new Path(inputDir, hour); Path outputDirWithHour = new Path(outputDir, hour); EasyMock.expect(mockedFs.exists(inputDir)).andReturn(true).once(); EasyMock.expect(mockedFs.exists(outputDirWithHour)).andReturn(false).once(); EasyMock.expect(mockedFs.globStatus((Path) EasyMock.anyObject())).andReturn(fileStatuses).once(); EasyMock.expect(mockedFileStatus.getPath()).andReturn(inputDirWithHour).anyTimes(); EasyMock.expect(mockedContentSummary.getLength()).andReturn(dataSize).once(); EasyMock.expect(mockedFs.getContentSummary(inputDirWithHour)).andReturn(mockedContentSummary).once();
/** Return the {@link ContentSummary} of a given {@link Path}. */ public ContentSummary getContentSummary(Path f) throws IOException { FileStatus status = getFileStatus(f); if (!status.isDir()) { // f is a file return new ContentSummary(status.getLen(), 1, 0); } // f is a directory long[] summary = {0, 0, 1}; for(FileStatus s : listStatus(f)) { ContentSummary c = getContentSummary(s.getPath()); summary[0] += c.getLength(); summary[1] += c.getFileCount(); summary[2] += c.getDirectoryCount(); } return new ContentSummary(summary[0], summary[1], summary[2]); }
FileSystem fs = dataDir1.getFileSystem(job); int symbolLinkedFileSize = 0; Path dir1_file1 = new Path(dataDir1, "file1"); writeTextFile(dir1_file1, "dir1_file1_line1\n" + "dir1_file1_line2\n"); symbolLinkedFileSize += fs.getFileStatus(dir1_file1).getLen(); Path dir1_file2 = new Path(dataDir1, "file2"); writeTextFile(dir1_file2, "dir1_file2_line1\n" + "dir2_file2_line2\n"); symbolLinkedFileSize += fs.getFileStatus(dir2_file2).getLen(); assertEquals(symbolLinkedFileSize, cs.getLength()); assertEquals(2, cs.getFileCount()); assertEquals(0, cs.getDirectoryCount());
/** Return the {@link ContentSummary} of a given {@link Path}. * @param f path to use * @throws FileNotFoundException if the path does not resolve * @throws IOException IO failure */ public ContentSummary getContentSummary(Path f) throws IOException { FileStatus status = getFileStatus(f); if (status.isFile()) { // f is a file long length = status.getLen(); return new ContentSummary.Builder().length(length). fileCount(1).directoryCount(0).spaceConsumed(length).build(); } // f is a directory long[] summary = {0, 0, 1}; for(FileStatus s : listStatus(f)) { long length = s.getLen(); ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : new ContentSummary.Builder().length(length). fileCount(1).directoryCount(0).spaceConsumed(length).build(); summary[0] += c.getLength(); summary[1] += c.getFileCount(); summary[2] += c.getDirectoryCount(); } return new ContentSummary.Builder().length(summary[0]). fileCount(summary[1]).directoryCount(summary[2]). spaceConsumed(summary[0]).build(); }
@Test public void testGetContentSummary() throws IOException { // GetContentSummary of a dir fSys.mkdirs(new Path("/newDir/dirFoo")); ContentSummary cs = fSys.getContentSummary(new Path("/newDir/dirFoo")); Assert.assertEquals(-1L, cs.getQuota()); Assert.assertEquals(-1L, cs.getSpaceQuota()); }
/** * @param location - path to a partition (or table if not partitioned) dir */ private static long getDataSize(Path location, HiveConf conf) throws IOException { FileSystem fs = location.getFileSystem(conf); ContentSummary cs = fs.getContentSummary(location); return cs.getLength(); }
@Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); if (succeed) { if (input.equalsIgnoreCase("archive") && StringUtils.isNotEmpty(outputPath)) { FileSystem fs = FileSystem.get(getConf()); ContentSummary contentSummary = fs.getContentSummary(new Path(outputPath)); logger.info("Generated file " + outputPath); logger.info(" - Size (HDFS) : " + humanReadableByteCount(contentSummary.getLength(), false)); logger.info(" - SpaceConsumed (raw) : " + humanReadableByteCount(contentSummary.getSpaceConsumed(), false)); } } }
public static long getPathSize(FileSystem fs, Path path) throws IOException { FileStatus[] stati = fs.globStatus(path); if (stati == null || stati.length == 0) { return -1L; } long size = 0; for (FileStatus status : stati) { size += fs.getContentSummary(status.getPath()).getLength(); } return size; } }
private void checkFile(FileSystem fileSys, Path name, int repl) throws IOException { assertTrue(fileSys.exists(name)); int replication = fileSys.getFileStatus(name).getReplication(); assertEquals("replication for " + name, repl, replication); long size = fileSys.getContentSummary(name).getLength(); assertEquals("file size for " + name, size, (long)FILE_SIZE); }
private long getSpace(FileStatus f) { Long cached = directorySpaces.get(f); if (cached != null) { return cached; } try { long space = f.isDirectory() ? fs.getContentSummary(f.getPath()).getSpaceConsumed() : f.getLen(); directorySpaces.put(f, space); return space; } catch (IOException e) { LOG.trace("Failed to get space consumed by path={}", f, e); return -1; } } });
/** {@inheritDoc} */ @Override public long usedSpaceSize() { try { // We don't use FileSystem#getUsed() since it counts only the files // in the filesystem root, not all the files recursively. return fileSystemForUser().getContentSummary(new Path("/")).getSpaceConsumed(); } catch (IOException e) { throw handleSecondaryFsError(e, "Failed to get used space size of file system."); } }
public void setInputPathSize(String inputPath) throws IOException { long fileSize = 0; for (FileStatus fileStatus : this.fsArray) { if (fileStatus.getPath().getName().startsWith("_") || fileStatus.getPath().getName().startsWith(".")) { continue; } fileSize = fileSize + this.fs.getContentSummary(fileStatus.getPath()).getSpaceConsumed(); } this.inputPathSize = (long) (fileSize * this.inputCompressionRatio); }