private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { TableScanDesc desc = op.getConf(); if (desc.isGatherStats()) { setOperatorIssue("gather stats not supported"); return false; } return true; }
@Override public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { TableScanDesc otherDesc = (TableScanDesc) other; return Objects.equals(getQualifiedTable(), otherDesc.getQualifiedTable()) && ExprNodeDescUtils.isSame(getFilterExpr(), otherDesc.getFilterExpr()) && getRowLimit() == otherDesc.getRowLimit() && isGatherStats() == otherDesc.isGatherStats(); } return false; }
@Override public void closeOp(boolean abort) throws HiveException { if (conf != null) { if (conf.isGatherStats() && stats.size() != 0) { publishStats(); } } super.closeOp(abort); }
private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { TableScanDesc desc = op.getConf(); if (desc.isGatherStats()) { setOperatorIssue("gather stats not supported"); return false; } return true; }
@Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); inputFileChanged = false; if (conf == null) { return; } rowLimit = conf.getRowLimit(); if (!conf.isGatherStats()) { return; } if (hconf instanceof JobConf) { jc = (JobConf) hconf; } else { // test code path jc = new JobConf(hconf); } defaultPartitionName = HiveConf.getVar(hconf, HiveConf.ConfVars.DEFAULTPARTITIONNAME); currentStat = null; stats = new HashMap<String, Stat>(); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator scanOp = (TableScanOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<FieldNode> cols = cppCtx .genColLists((Operator<? extends OperatorDesc>) nd); if (cols == null && !scanOp.getConf().isGatherStats() ) { scanOp.setNeededColumnIDs(null); return null; } cols = cols == null ? new ArrayList<FieldNode>() : cols; cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols); RowSchema inputRS = scanOp.getSchema(); setupNeededColumns(scanOp, inputRS, cols); return null; } }
@Override public void closeOp(boolean abort) throws HiveException { if (getExecContext() != null && getExecContext().getFileId() == null) { updateFileId(); } if (conf != null) { if (conf.isGatherStats() && stats.size() != 0) { publishStats(); } } super.closeOp(abort); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator scanOp = (TableScanOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<FieldNode> cols = cppCtx .genColLists((Operator<? extends OperatorDesc>) nd); if (cols == null && !scanOp.getConf().isGatherStats() ) { scanOp.setNeededColumnIDs(null); return null; } cols = cols == null ? new ArrayList<FieldNode>() : cols; cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols); RowSchema inputRS = scanOp.getSchema(); setupNeededColumns(scanOp, inputRS, cols); return null; } }
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, when this * assumption stops to be true, i.e table data won't be only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { if (checkSetDone(row, tag)) { return; } } if (conf != null && conf.isGatherStats()) { gatherStats(row); } if (vectorized) { vectorForward((VectorizedRowBatch) row); } else { forward(row, inputObjInspectors[tag]); } }
if(scanOp.getConf().isGatherStats()) { cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));
if(scanOp.getConf().isGatherStats()) { cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, when this * assumption stops to be true, i.e table data won't be only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { if (row instanceof VectorizedRowBatch) { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (currCount >= rowLimit) { setDone(true); return; } if (currCount + batch.size > rowLimit) { batch.size = rowLimit - currCount; } currCount += batch.size; } else if (currCount++ >= rowLimit) { setDone(true); return; } } if (conf != null && conf.isGatherStats()) { gatherStats(row); } forward(row, inputObjInspectors[tag]); }
@Override public boolean equals(TableScanOperator op1, TableScanOperator op2) { Preconditions.checkNotNull(op1); Preconditions.checkNotNull(op2); TableScanDesc op1Conf = op1.getConf(); TableScanDesc op2Conf = op2.getConf(); if (compareString(op1Conf.getAlias(), op2Conf.getAlias()) && compareExprNodeDesc(op1Conf.getFilterExpr(), op2Conf.getFilterExpr()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() && op1Conf.isGatherStats() == op2Conf.isGatherStats()) { return true; } else { return false; } } }
@Override public void closeOp(boolean abort) throws HiveException { if (conf != null) { if (conf.isGatherStats() && stat != null) { publishStats(); } } }
@Override public void closeOp(boolean abort) throws HiveException { if (conf != null) { if (conf.isGatherStats() && stats.size() != 0) { publishStats(); } } }
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, this * assumption is not true, i.e table data is not only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void processOp(Object row, int tag) throws HiveException { if (conf != null && conf.isGatherStats()) { gatherStats(row); } forward(row, inputObjInspectors[tag]); }
@Override protected void initializeOp(Configuration hconf) throws HiveException { initializeChildren(hconf); inputFileChanged = false; if (conf == null) { return; } if (!conf.isGatherStats()) { return; } this.hconf = hconf; if (hconf instanceof JobConf) { jc = (JobConf) hconf; } else { // test code path jc = new JobConf(hconf, ExecDriver.class); } stat = null; partitionSpecs = null; if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) { // NON PARTITIONED table return; } }
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, when this * assumption stops to be true, i.e table data won't be only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0 && currCount++ >= rowLimit) { setDone(true); return; } if (conf != null && conf.isGatherStats()) { gatherStats(row); } forward(row, inputObjInspectors[tag]); }
@Override protected Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException { Collection<Future<?>> result = super.initializeOp(hconf); inputFileChanged = false; if (conf == null) { return result; } rowLimit = conf.getRowLimit(); if (!conf.isGatherStats()) { return result; } this.hconf = hconf; if (hconf instanceof JobConf) { jc = (JobConf) hconf; } else { // test code path jc = new JobConf(hconf); } defaultPartitionName = HiveConf.getVar(hconf, HiveConf.ConfVars.DEFAULTPARTITIONNAME); currentStat = null; stats = new HashMap<String, Stat>(); if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) { // NON PARTITIONED table return result; } return result; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator scanOp = (TableScanOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> cols = cppCtx .genColLists((Operator<? extends OperatorDesc>) nd); if (cols == null && !scanOp.getConf().isGatherStats() ) { scanOp.setNeededColumnIDs(null); return null; } cols = cols == null ? new ArrayList<String>() : cols; cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols); RowSchema inputRS = scanOp.getSchema(); setupNeededColumns(scanOp, inputRS, cols); return null; } }