/** * Get raw data size of a give table * @return raw data size */ public static long getRawDataSize(Table table) { return getBasicStatForTable(table, StatsSetupConst.RAW_DATA_SIZE); }
@Override public void beforeExecute(int executionIndex, boolean explainReOptimization) { if (explainReOptimization) { statsReaderHook.setCollectOnSuccess(true); } }
public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache, List<String> referencedColumns, boolean fetchColStats) throws HiveException { return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns, fetchColStats, false); }
@Test public void testDataSizeEstimator() { Partish p1 = new LocalPartishBuilder().totalSize(10).buildPartition(); HiveConf conf = new HiveConf(); conf.setFloatVar(ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR, 13.0f); BasicStats.Factory factory = new BasicStats.Factory(new BasicStats.DataSizeEstimator(conf)); BasicStats res = factory.build(p1); assertEquals(130, res.getDataSize()); }
public static StatsPublisher getStatsPublisher(JobConf jc) { StatsFactory factory = StatsFactory.newFactory(jc); return factory == null ? null : factory.getStatsPublisher(); }
@Override public void apply(BasicStats stats) { long ds = stats.getRawDataSize(); if (ds <= 0) { ds = stats.getTotalSize(); // if data size is still 0 then get file size if (ds <= 0) { Path path = stats.partish.getPath(); try { ds = getFileSizeForPath(path); } catch (IOException e) { ds = 0L; } } ds = (long) (ds * deserFactor); stats.setDataSize(ds); } }
@Override public void apply(BasicStats stats) { if (stats.getNumRows() == 0 || stats.getNumRows() == -1) { stats.setNumRows(1); } } }
public static String getFullyQualifiedTableName(String dbName, String tabName) { return getFullyQualifiedName(dbName, tabName); }
public BasicStats build(Partish p) { BasicStats ret = new BasicStats(p); for (IStatsEnhancer enhancer : enhancers) { ret.apply(enhancer); } return ret; }
@Override public BasicStats call() throws Exception { return build(part); } }));
@Override public Void call() throws Exception { bsp.collectFileStatus(wh, conf); return null; } }));
public BasicStats(Partish p) { partish = p; rowCount = parseLong(StatsSetupConst.ROW_COUNT); rawDataSize = parseLong(StatsSetupConst.RAW_DATA_SIZE); totalSize = parseLong(StatsSetupConst.TOTAL_SIZE); currentNumRows = rowCount; currentDataSize = rawDataSize; if (currentNumRows > 0) { state = State.COMPLETE; } else { state = State.NONE; } }
@Override public int process(Hive db, Table tbl) throws Exception { return persistColumnStats(db, tbl); }
public static Partish buildFor(Table table, Partition part) { return new PPart(table, part); }
public static Partish buildFor(Table table) { return new PTable(table); }
void apply(IStatsEnhancer estimator) { estimator.apply(this); }
public static BasicStats buildFrom(List<BasicStats> partStats) { return new BasicStats(partStats); }
/** * Get total size of a give table * @return total size */ public static long getTotalSize(Table table) { return getBasicStatForTable(table, StatsSetupConst.TOTAL_SIZE); }
@Override public void apply(BasicStats stats) { if (stats.getNumRows() == 0) { stats.setNumRows(1); } } }
/** * Get number of Erasure Coded files for a table * @return count of EC files */ public static long getErasureCodedFiles(Table table) { return getBasicStatForTable(table, StatsSetupConst.NUM_ERASURE_CODED_FILES); }