org.apache.hadoop.hive.ql.plan.TableScanDesc.isGatherStats java code examples

private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) {
 TableScanDesc desc = op.getConf();
 if (desc.isGatherStats()) {
  setOperatorIssue("gather stats not supported");
  return false;
 }
 return true;
}

@Override
public boolean isSame(OperatorDesc other) {
 if (getClass().getName().equals(other.getClass().getName())) {
  TableScanDesc otherDesc = (TableScanDesc) other;
  return Objects.equals(getQualifiedTable(), otherDesc.getQualifiedTable()) &&
    ExprNodeDescUtils.isSame(getFilterExpr(), otherDesc.getFilterExpr()) &&
    getRowLimit() == otherDesc.getRowLimit() &&
    isGatherStats() == otherDesc.isGatherStats();
 }
 return false;
}

@Override
public void closeOp(boolean abort) throws HiveException {
 if (conf != null) {
  if (conf.isGatherStats() && stats.size() != 0) {
   publishStats();
  }
 }
 super.closeOp(abort);
}

private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) {
 TableScanDesc desc = op.getConf();
 if (desc.isGatherStats()) {
  setOperatorIssue("gather stats not supported");
  return false;
 }
 return true;
}

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
 super.initializeOp(hconf);
 inputFileChanged = false;
 if (conf == null) {
  return;
 }
 rowLimit = conf.getRowLimit();
 if (!conf.isGatherStats()) {
  return;
 }
 if (hconf instanceof JobConf) {
  jc = (JobConf) hconf;
 } else {
  // test code path
  jc = new JobConf(hconf);
 }
 defaultPartitionName = HiveConf.getVar(hconf, HiveConf.ConfVars.DEFAULTPARTITIONNAME);
 currentStat = null;
 stats = new HashMap<String, Stat>();
}

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
   Object... nodeOutputs) throws SemanticException {
  TableScanOperator scanOp = (TableScanOperator) nd;
  ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
  List<FieldNode> cols = cppCtx
    .genColLists((Operator<? extends OperatorDesc>) nd);
  if (cols == null && !scanOp.getConf().isGatherStats() ) {
   scanOp.setNeededColumnIDs(null);
   return null;
  }
  cols = cols == null ? new ArrayList<FieldNode>() : cols;
  cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols);
  RowSchema inputRS = scanOp.getSchema();
  setupNeededColumns(scanOp, inputRS, cols);
  return null;
 }
}

@Override
public void closeOp(boolean abort) throws HiveException {
 if (getExecContext() != null && getExecContext().getFileId() == null) {
  updateFileId();
 }
 if (conf != null) {
  if (conf.isGatherStats() && stats.size() != 0) {
   publishStats();
  }
 }
 super.closeOp(abort);
}

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
   Object... nodeOutputs) throws SemanticException {
  TableScanOperator scanOp = (TableScanOperator) nd;
  ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
  List<FieldNode> cols = cppCtx
    .genColLists((Operator<? extends OperatorDesc>) nd);
  if (cols == null && !scanOp.getConf().isGatherStats() ) {
   scanOp.setNeededColumnIDs(null);
   return null;
  }
  cols = cols == null ? new ArrayList<FieldNode>() : cols;
  cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols);
  RowSchema inputRS = scanOp.getSchema();
  setupNeededColumns(scanOp, inputRS, cols);
  return null;
 }
}

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, when this
 * assumption stops to be true, i.e table data won't be only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void process(Object row, int tag) throws HiveException {
 if (rowLimit >= 0) {
  if (checkSetDone(row, tag)) {
   return;
  }
 }
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 if (vectorized) {
  vectorForward((VectorizedRowBatch) row);
 } else {
  forward(row, inputObjInspectors[tag]);
 }
}

if(scanOp.getConf().isGatherStats()) {
 cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));

if(scanOp.getConf().isGatherStats()) {
 cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, when this
 * assumption stops to be true, i.e table data won't be only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void process(Object row, int tag) throws HiveException {
 if (rowLimit >= 0) {
  if (row instanceof VectorizedRowBatch) {
   VectorizedRowBatch batch = (VectorizedRowBatch) row;
   if (currCount >= rowLimit) {
    setDone(true);
    return;
   }
   if (currCount + batch.size > rowLimit) {
    batch.size = rowLimit - currCount;
   }
   currCount += batch.size;
  } else if (currCount++ >= rowLimit) {
   setDone(true);
   return;
  }
 }
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 forward(row, inputObjInspectors[tag]);
}

 @Override
 public boolean equals(TableScanOperator op1, TableScanOperator op2) {
  Preconditions.checkNotNull(op1);
  Preconditions.checkNotNull(op2);
  TableScanDesc op1Conf = op1.getConf();
  TableScanDesc op2Conf = op2.getConf();
  if (compareString(op1Conf.getAlias(), op2Conf.getAlias()) &&
   compareExprNodeDesc(op1Conf.getFilterExpr(), op2Conf.getFilterExpr()) &&
   op1Conf.getRowLimit() == op2Conf.getRowLimit() &&
   op1Conf.isGatherStats() == op2Conf.isGatherStats()) {
   return true;
  } else {
   return false;
  }
 }
}

@Override
public void closeOp(boolean abort) throws HiveException {
 if (conf != null) {
  if (conf.isGatherStats() && stat != null) {
   publishStats();
  }
 }
}

@Override
public void closeOp(boolean abort) throws HiveException {
 if (conf != null) {
  if (conf.isGatherStats() && stats.size() != 0) {
   publishStats();
  }
 }
}

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, this
 * assumption is not true, i.e table data is not only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void processOp(Object row, int tag) throws HiveException {
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 forward(row, inputObjInspectors[tag]);
}

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
 initializeChildren(hconf);
 inputFileChanged = false;
 if (conf == null) {
  return;
 }
 if (!conf.isGatherStats()) {
  return;
 }
 this.hconf = hconf;
 if (hconf instanceof JobConf) {
  jc = (JobConf) hconf;
 } else {
  // test code path
  jc = new JobConf(hconf, ExecDriver.class);
 }
 stat = null;
 partitionSpecs = null;
 if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) {
  // NON PARTITIONED table
  return;
 }
}

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, when this
 * assumption stops to be true, i.e table data won't be only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void process(Object row, int tag) throws HiveException {
 if (rowLimit >= 0 && currCount++ >= rowLimit) {
  setDone(true);
  return;
 }
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 forward(row, inputObjInspectors[tag]);
}

@Override
protected Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException {
 Collection<Future<?>> result = super.initializeOp(hconf);
 inputFileChanged = false;
 if (conf == null) {
  return result;
 }
 rowLimit = conf.getRowLimit();
 if (!conf.isGatherStats()) {
  return result;
 }
 this.hconf = hconf;
 if (hconf instanceof JobConf) {
  jc = (JobConf) hconf;
 } else {
  // test code path
  jc = new JobConf(hconf);
 }
 defaultPartitionName = HiveConf.getVar(hconf, HiveConf.ConfVars.DEFAULTPARTITIONNAME);
 currentStat = null;
 stats = new HashMap<String, Stat>();
 if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) {
  // NON PARTITIONED table
  return result;
 }
 return result;
}

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
   Object... nodeOutputs) throws SemanticException {
  TableScanOperator scanOp = (TableScanOperator) nd;
  ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
  List<String> cols = cppCtx
    .genColLists((Operator<? extends OperatorDesc>) nd);
  if (cols == null && !scanOp.getConf().isGatherStats() ) {
   scanOp.setNeededColumnIDs(null);
   return null;
  }
  cols = cols == null ? new ArrayList<String>() : cols;
  cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
    cols);
  RowSchema inputRS = scanOp.getSchema();
  setupNeededColumns(scanOp, inputRS, cols);
  return null;
 }
}

How to use isGatherStatsmethodin org.apache.hadoop.hive.ql.plan.TableScanDesc

Best Java code snippets using org.apache.hadoop.hive.ql.plan.TableScanDesc.isGatherStats (Showing top 20 results out of 315)

How to use
isGatherStats
method
in
org.apache.hadoop.hive.ql.plan.TableScanDesc