@Override public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { AppMasterEventDesc otherDesc = (AppMasterEventDesc) other; return Objects.equals(getInputName(), otherDesc.getInputName()) && Objects.equals(getVertexName(), otherDesc.getVertexName()) && Objects.equals(getTable(), otherDesc.getTable()); } return false; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { OptimizeTezProcContext context = (OptimizeTezProcContext) procContext; AppMasterEventOperator event = (AppMasterEventOperator) nd; AppMasterEventDesc desc = event.getConf(); if (desc.getStatistics().getDataSize() > context.conf .getLongVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE) && (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(event))) { context.pruningOpsRemovedByPriorOpt.add(event); GenTezUtils.removeBranch(event); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) desc).getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize()); } return false; } }
@Override public void writeEventHeader(DataOutputBuffer buffer) throws IOException { super.writeEventHeader(buffer); buffer.writeUTF(targetColumnName); }
@Override public void closeOp(boolean abort) throws HiveException { if (!abort) { TezContext context = (TezContext) TezContext.get(); String vertexName = getConf().getVertexName(); String inputName = getConf().getInputName(); byte[] payload = null; if (hasReachedMaxSize) { initDataBuffer(true); } payload = new byte[buffer.getLength()]; System.arraycopy(buffer.getData(), 0, payload, 0, buffer.getLength()); Event event = InputInitializerEvent.create(vertexName, inputName, ByteBuffer.wrap(payload, 0, payload.length)); if (LOG.isInfoEnabled()) { LOG.info("Sending Tez event to vertex = " + vertexName + ", input = " + inputName + ". Payload size = " + payload.length); } context.getTezProcessorContext().sendEvents(Collections.singletonList(event)); } }
@Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); }
@Explain(vectorization = Vectorization.OPERATOR, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { VectorAppMasterEventDesc vectorAppMasterEventDesc = (VectorAppMasterEventDesc) getVectorDesc(); if (vectorAppMasterEventDesc == null) { return null; } return new AppMasterEventOperatorExplainVectorization(this, vectorAppMasterEventDesc); }
@Override public boolean isSame(OperatorDesc other) { if (super.isSame(other)) { DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) other; return Objects.equals(getTargetColumnName(), otherDesc.getTargetColumnName()) && Objects.equals(getTargetColumnType(), otherDesc.getTargetColumnType()) && Objects.equals(getPartKeyString(), otherDesc.getPartKeyString()); } return false; }
eventDesc.setVectorDesc(vectorEventDesc); vectorOp = OperatorFactory.getVectorOperator( op.getCompilationOpContext(), eventDesc, vContext, op);
@Override public void closeOp(boolean abort) throws HiveException { if (!abort) { TezContext context = (TezContext) TezContext.get(); String vertexName = getConf().getVertexName(); String inputName = getConf().getInputName(); byte[] payload = null; if (hasReachedMaxSize) { initDataBuffer(true); } payload = new byte[buffer.getLength()]; System.arraycopy(buffer.getData(), 0, payload, 0, buffer.getLength()); Event event = InputInitializerEvent.create(vertexName, inputName, ByteBuffer.wrap(payload, 0, payload.length)); if (isLogInfoEnabled) { LOG.info("Sending Tez event to vertex = " + vertexName + ", input = " + inputName + ". Payload size = " + payload.length); } context.getTezProcessorContext().sendEvents(Collections.singletonList(event)); } }
@Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); }
@Override public void closeOp(boolean abort) throws HiveException { if (!abort) { TezContext context = (TezContext) TezContext.get(); String vertexName = getConf().getVertexName(); String inputName = getConf().getInputName(); byte[] payload = null; if (hasReachedMaxSize) { initDataBuffer(true); } payload = new byte[buffer.getLength()]; System.arraycopy(buffer.getData(), 0, payload, 0, buffer.getLength()); Event event = InputInitializerEvent.create(vertexName, inputName, ByteBuffer.wrap(payload, 0, payload.length)); if (isLogInfoEnabled) { LOG.info("Sending Tez event to vertex = " + vertexName + ", input = " + inputName + ". Payload size = " + payload.length); } context.getTezProcessorContext().sendEvents(Collections.singletonList(event)); } }
tables.add(event.getConf().getTable());
@Override public void writeEventHeader(DataOutputBuffer buffer) throws IOException { super.writeEventHeader(buffer); buffer.writeUTF(targetColumnName); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { OptimizeTezProcContext context = (OptimizeTezProcContext) procContext; AppMasterEventOperator event = (AppMasterEventOperator) nd; AppMasterEventDesc desc = event.getConf(); if (desc.getStatistics().getDataSize() > context.conf .getLongVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE) && (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(event))) { context.pruningOpsRemovedByPriorOpt.add(event); GenTezUtils.removeBranch(event); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) desc).getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize()); } return false; } }
tables.add(event.getConf().getTable());
protected void initDataBuffer(boolean skipPruning) throws HiveException { buffer = new DataOutputBuffer(); try { // add any other header info getConf().writeEventHeader(buffer); // write byte to say whether to skip pruning or not buffer.writeBoolean(skipPruning); } catch (IOException e) { throw new HiveException(e); } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { OptimizeTezProcContext context = (OptimizeTezProcContext) procContext; AppMasterEventOperator event = (AppMasterEventOperator) nd; AppMasterEventDesc desc = event.getConf(); if (desc.getStatistics().getDataSize() > context.conf .getLongVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE) && (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(event))) { context.pruningOpsRemovedByPriorOpt.add(event); GenTezUtils.getUtils().removeBranch(event); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) desc).getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize()); } return false; } }
@Override public Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException { Collection<Future<?>> result = super.initializeOp(hconf); MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); return result; }
protected void initDataBuffer(boolean skipPruning) throws HiveException { buffer = new DataOutputBuffer(); try { // add any other header info getConf().writeEventHeader(buffer); // write byte to say whether to skip pruning or not buffer.writeBoolean(skipPruning); } catch (IOException e) { throw new HiveException(e); } }
private void removeEventOperator(Set<Operator<?>> component, OptimizeTezProcContext context) { AppMasterEventOperator victim = null; for (Operator<?> o : component) { if (o instanceof AppMasterEventOperator) { if (victim == null || o.getConf().getStatistics().getDataSize() < victim.getConf().getStatistics() .getDataSize()) { victim = (AppMasterEventOperator) o; } } } if (victim == null || (!context.pruningOpsRemovedByPriorOpt.isEmpty() && context.pruningOpsRemovedByPriorOpt.contains(victim))) { return; } GenTezUtils.getUtils().removeBranch(victim); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) victim.getConf()).getTableScan().toString() + ". Needed to break cyclic dependency"); }