/** * @param location - path to a partition (or table if not partitioned) dir */ private static long getDataSize(Path location, Configuration conf) throws IOException { FileSystem fs = location.getFileSystem(conf); ContentSummary cs = fs.getContentSummary(location); return cs.getLength(); }
@Override public Long call() throws Exception { try { LOG.debug("Partition path : " + path); FileSystem fs = path.getFileSystem(conf); return fs.getContentSummary(path).getLength(); } catch (IOException e) { return 0L; } } }));
/** * @param location - path to a partition (or table if not partitioned) dir */ private static long getDataSize(Path location, HiveConf conf) throws IOException { FileSystem fs = location.getFileSystem(conf); ContentSummary cs = fs.getContentSummary(location); return cs.getLength(); }
private long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); long length = contentSummary.getLength(); return length; }
private long getFileSizeForPath(Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); return fs.getContentSummary(path).getLength(); }
/** * Return the total size of all files from a specified path. * @throws IOException IO failure */ public long getUsed(Path path) throws IOException { return getContentSummary(path).getLength(); }
private long getFileLength() throws IOException { if( fileLen==-1L ) { fileLen = fs.getContentSummary(file).getLength(); } return fileLen; }
@Override public Long call() throws Exception { try { LOG.debug("Partition path : " + path); FileSystem fs = path.getFileSystem(conf); return fs.getContentSummary(path).getLength(); } catch (IOException e) { return 0L; } } }));
private long getSize(HiveConf conf, String size, Path path) { // If the size is present in the metastore, use it if (size != null) { try { return Long.parseLong(size); } catch (NumberFormatException e) { return -1; } } try { FileSystem fs = path.getFileSystem(conf); return fs.getContentSummary(path).getLength(); } catch (Exception e) { return -1; } }
private long getFileSize(Path currentPath) throws IOException { long fileSize; try { fileSize = fs.getContentSummary(currentPath).getLength(); } catch (FileNotFoundException e) { currentPath = getArchivedLogPath(currentPath, conf); fileSize = fs.getContentSummary(currentPath).getLength(); } return fileSize; }
private void calculateEntrySize(CacheEntry entry, FetchWork fetchWork) throws IOException { Path queryResultsPath = fetchWork.getTblDir(); FileSystem resultsFs = queryResultsPath.getFileSystem(conf); ContentSummary cs = resultsFs.getContentSummary(queryResultsPath); entry.size = cs.getLength(); }
private long getInputSize() throws IOException { long inputSize = 0; for (Path inputPath : this.getInputPaths()) { inputSize += this.fs.getContentSummary(inputPath).getLength(); } return inputSize; }
public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) throws Exception { if (ctx != null) { ContentSummary cs = ctx.getCS(dirPath); if (cs != null) { if (LOG.isDebugEnabled()) { LOG.debug("Content Summary cached for {} length: {} num files: {} " + "num directories: {}", dirPath, cs.getLength(), cs.getFileCount(), cs.getDirectoryCount()); } return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); } else { LOG.debug("Content Summary not cached for {}", dirPath); } } return isEmptyPath(job, dirPath); }
public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) throws Exception { if (ctx != null) { ContentSummary cs = ctx.getCS(dirPath); if (cs != null) { LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: " + cs.getFileCount() + " num directories: " + cs.getDirectoryCount()); return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); } else { LOG.info("Content Summary not cached for " + dirPath); } } return isEmptyPath(job, dirPath); }
@Test public void testCopyWithDistcp() throws IOException { Path copySrc = new Path("copySrc"); Path copyDst = new Path("copyDst"); HiveConf conf = new HiveConf(TestFileUtils.class); FileSystem mockFs = mock(FileSystem.class); when(mockFs.getUri()).thenReturn(URI.create("hdfs:///")); ContentSummary mockContentSummary = mock(ContentSummary.class); when(mockContentSummary.getFileCount()).thenReturn(Long.MAX_VALUE); when(mockContentSummary.getLength()).thenReturn(Long.MAX_VALUE); when(mockFs.getContentSummary(any(Path.class))).thenReturn(mockContentSummary); HadoopShims shims = mock(HadoopShims.class); when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf)).thenReturn(true); Assert.assertTrue(FileUtils.copy(mockFs, copySrc, mockFs, copyDst, false, false, conf, shims)); verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf); }
@Override public Model load(Path f) throws IOException { FSDataInputStream is = _hfs.open(f); byte buf[] = MemoryManager.malloc1((int) _hfs.getContentSummary(f).getLength()); try { is.readFully(buf); } finally { is.close(); } AutoBuffer ab=ab4read(buf); Model m = loadHeader(ab); m.getModelSerializer().load(m, ab); if (m._key!=null) { DKV.put(m._key, m); } return m; }
@Override public int hashCode() { long result = getLength() ^ getFileCount() ^ getDirectoryCount() ^ getSnapshotLength() ^ getSnapshotFileCount() ^ getSnapshotDirectoryCount() ^ getSnapshotSpaceConsumed() ^ getErasureCodingPolicy().hashCode(); return ((int) result) ^ super.hashCode(); }
@Override protected void processPath(PathData item) throws IOException { ContentSummary contentSummary = item.fs.getContentSummary(item.path); long length = contentSummary.getLength(); long spaceConsumed = contentSummary.getSpaceConsumed(); if (excludeSnapshots) { length -= contentSummary.getSnapshotLength(); spaceConsumed -= contentSummary.getSnapshotSpaceConsumed(); } getUsagesTable().addRow(formatSize(length), formatSize(spaceConsumed), item); } }
@Test public void testGetInputSummaryWithASingleThread() throws IOException { final int NUM_PARTITIONS = 5; final int BYTES_PER_FILE = 5; JobConf jobConf = new JobConf(); Properties properties = new Properties(); jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, 0); ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class); assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); assertEquals(NUM_PARTITIONS, summary.getFileCount()); assertEquals(NUM_PARTITIONS, summary.getDirectoryCount()); }
@Test public void testGetInputSummaryWithContentSummaryInputFormat() throws IOException { final int NUM_PARTITIONS = 5; final int BYTES_PER_FILE = 10; JobConf jobConf = new JobConf(); Properties properties = new Properties(); jobConf.setInt(Utilities.DEPRECATED_MAPRED_DFSCLIENT_PARALLELISM_MAX, 2); ContentSummaryInputFormatTestClass.setContentSummary( new ContentSummary.Builder().length(BYTES_PER_FILE).fileCount(2).directoryCount(1).build()); /* Let's write more bytes to the files to test that ContentSummaryInputFormat is actually working returning the file size not from the filesystem */ ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE * 2, ContentSummaryInputFormatTestClass.class); assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); assertEquals(NUM_PARTITIONS * 2, summary.getFileCount()); assertEquals(NUM_PARTITIONS, summary.getDirectoryCount()); }