@Override public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) { setReducer(replacementMap.get(getReducer())); }
@Override public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) { setReducer(replacementMap.get(getReducer())); }
public ReduceWork createReduceWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork) throws SemanticException { Preconditions.checkArgument(!root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be non-empty"); ReduceWork reduceWork = new ReduceWork("Reducer " + (++sequenceNumber)); LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root); reduceWork.setReducer(root); reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); // Pick the maximum # reducers across all parents as the # of reduce tasks. int maxExecutors = -1; for (Operator<? extends OperatorDesc> parentOfRoot : root.getParentOperators()) { Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator, "AssertionError: expected parentOfRoot to be an " + "instance of ReduceSinkOperator, but was " + parentOfRoot.getClass().getName()); ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot; maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers()); } reduceWork.setNumReduceTasks(maxExecutors); ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; setupReduceSink(context, reduceWork, reduceSink); sparkWork.add(reduceWork); SparkEdgeProperty edgeProp = getEdgeProperty(context.conf, reduceSink, reduceWork); sparkWork.connect(context.preceedingWork, reduceWork, edgeProp); return reduceWork; }
private void vectorizeReduceWork(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Vectorizing ReduceWork..."); reduceWork.setVectorMode(true); // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as // expected. We need to descend down, otherwise it breaks our algorithm that determines // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); ReduceWorkVectorizationNodeProcessor vnp = new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); // iterator the reduce operator tree ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.add(reduceWork.getReducer()); LOG.info("vectorizeReduceWork reducer Operator: " + reduceWork.getReducer().getName() + "..."); HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>(); ogw.startWalking(topNodes, nodeOutput); // Necessary since we are vectorizing the root operator in reduce. reduceWork.setReducer(vnp.getRootVectorOp()); vectorTaskColumnInfo.setScratchTypeNameArray(vnp.getVectorScratchColumnTypeNames()); vectorTaskColumnInfo.transferToBaseWork(reduceWork); if (LOG.isDebugEnabled()) { debugDisplayAllMaps(reduceWork); } }
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException { Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0); MapredWork plan = (MapredWork) unionTask.getWork(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (needsTagging(plan.getReduceWork())) { plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); }
public ReduceWork createReduceWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork) throws SemanticException { Preconditions.checkArgument(!root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be non-empty"); ReduceWork reduceWork = new ReduceWork("Reducer " + (++sequenceNumber)); LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root); reduceWork.setReducer(root); reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); // Pick the maximum # reducers across all parents as the # of reduce tasks. int maxExecutors = -1; for (Operator<? extends OperatorDesc> parentOfRoot : root.getParentOperators()) { Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator, "AssertionError: expected parentOfRoot to be an " + "instance of ReduceSinkOperator, but was " + parentOfRoot.getClass().getName()); ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot; maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers()); } reduceWork.setNumReduceTasks(maxExecutors); ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; setupReduceSink(context, reduceWork, reduceSink); sparkWork.add(reduceWork); SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork); sparkWork.connect(context.preceedingWork, reduceWork, edgeProp); return reduceWork; }
/** * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); Task<? extends Serializable> parentTask = opProcCtx.getCurrTask(); MapredWork childPlan = getMapRedWork(parseCtx); Task<? extends Serializable> childTask = TaskFactory.get(childPlan); Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0); // Add the reducer ReduceWork rWork = new ReduceWork(); childPlan.setReduceWork(rWork); rWork.setReducer(reducer); ReduceSinkDesc desc = cRS.getConf(); childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); opProcCtx.getOpTaskMap().put(reducer, childTask); splitTasks(cRS, parentTask, childTask, opProcCtx); }
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException { Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0); MapredWork plan = (MapredWork) unionTask.getWork(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (needsTagging(plan.getReduceWork())) { plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); }
/** * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); Task<? extends Serializable> parentTask = opProcCtx.getCurrTask(); MapredWork childPlan = getMapRedWork(parseCtx); Task<? extends Serializable> childTask = TaskFactory.get(childPlan, parseCtx .getConf()); Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0); // Add the reducer ReduceWork rWork = new ReduceWork(); childPlan.setReduceWork(rWork); rWork.setReducer(reducer); ReduceSinkDesc desc = cRS.getConf(); childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); opProcCtx.getOpTaskMap().put(reducer, childTask); splitTasks(cRS, parentTask, childTask, opProcCtx); }
mws[1].setPathToAliases(pathMap); rws[0].setReducer(op); rws[1].setReducer(op);
reduceWork.setReducer(newVectorReducer);
plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf();
@SuppressWarnings("unchecked") private void populateMapRedPlan1(Table src) throws SemanticException { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); List<String> colNames = new ArrayList<String>(); colNames.add(HiveConf.getColumnInternalName(2)); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3); rWork.setReducer(op2); }
@SuppressWarnings("unchecked") private void populateMapRedPlan2(Table src) throws Exception { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan2.out"), Utilities.defaultTd, false)); Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf();
reduceWork.setReducer(root); reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
@SuppressWarnings("unchecked") private void populateMapRedPlan5(Table src) throws SemanticException { // map-side work ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities .makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0); addMapWork(mr, src, "a", op4); ReduceWork rWork = new ReduceWork(); mr.setReduceWork(rWork); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op0.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo()); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan5.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2);
Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op2); rWork.setReducer(op5);
Utilities.makeList(outputColumns.get(0))), op4); rWork.setReducer(op5);