public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache, List<String> referencedColumns, boolean fetchColStats) throws HiveException { return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns, fetchColStats, false); }
private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache, List<String> referencedColumns) throws HiveException { boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean testMode = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST); return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns, fetchColStats, testMode); }
private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, List<String> referencedColumns) throws HiveException { boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean fetchPartStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS); return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns, fetchColStats, fetchPartStats); }
/** * Collect table, partition and column level statistics * @param conf * - hive configuration * @param partList * - partition list * @param table * - table * @param tableScanOperator * - table scan operator * @return statistics object * @throws HiveException */ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, ColumnStatsList colStatsCache, Table table, TableScanOperator tableScanOperator) throws HiveException { // column level statistics are required only for the columns that are needed List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature(); List<String> neededColumns = tableScanOperator.getNeededColumns(); List<String> referencedColumns = tableScanOperator.getReferencedColumns(); return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns); }
Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, nonPartColNamesThatRqrStats, true, true);
Statistics stats = StatsUtils.collectStatistics(hiveConf, null, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true); colStatsCached.updateState(State.COMPLETE); } else { Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true);
/** * Collect table, partition and column level statistics * @param conf * - hive configuration * @param partList * - partition list * @param table * - table * @param tableScanOperator * - table scan operator * @return statistics object * @throws HiveException */ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) throws HiveException { // column level statistics are required only for the columns that are needed List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature(); List<String> neededColumns = tableScanOperator.getNeededColumns(); List<String> referencedColumns = tableScanOperator.getReferencedColumns(); return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsop = (TableScanOperator) nd; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop); ColumnStatsList colStatsCached = aspCtx.getParseContext().getColStatsCached(partList); Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, colStatsCached, table, tsop); stats = applyRuntimeStats(aspCtx.getParseContext().getContext(), stats, tsop); tsop.setStatistics(stats); if (LOG.isDebugEnabled()) { LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } } catch (HiveException e) { LOG.debug("Failed to retrieve stats ", e); throw new SemanticException(e); } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsop = (TableScanOperator) nd; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop); Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); tsop.setStatistics(stats.clone()); if (isDebugEnabled) { LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } catch (HiveException e) { LOG.debug("Failed to retrieve stats ",e); throw new SemanticException(e); } return null; } }
private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, List<String> referencedColumns) throws HiveException { boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean fetchPartStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS); return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns, fetchColStats, fetchPartStats); }
Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, nonPartColNamesThatRqrStats, true, true);
/** * Collect table, partition and column level statistics * @param conf * - hive configuration * @param partList * - partition list * @param table * - table * @param tableScanOperator * - table scan operator * @return statistics object * @throws HiveException */ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) throws HiveException { // column level statistics are required only for the columns that are needed List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature(); List<String> neededColumns = tableScanOperator.getNeededColumns(); List<String> referencedColumns = tableScanOperator.getReferencedColumns(); return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsop = (TableScanOperator) nd; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop); Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); tsop.setStatistics(stats.clone()); if (isDebugEnabled) { LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } catch (HiveException e) { LOG.debug(e); throw new SemanticException(e); } return null; } }