private void setupMapRedWork(HiveConf conf, MapredWork work, long targetSize, long totalSize) { if (work.getNumReduceTasks() > 0) { int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); int reducers = (int) ((totalSize + targetSize - 1) / targetSize); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); work.setNumReduceTasks(reducers); } work.setMaxSplitSize(targetSize); work.setMinSplitSize(targetSize); work.setMinSplitSizePerNode(targetSize); work.setMinSplitSizePerRack(targetSize); }
/** * Create a MapredWork based on input path, the top operator and the input * table descriptor. * @param conf * @param topOp the table scan operator that is the root of the MapReduce task. * @param fsDesc the file sink descriptor that serves as the input to this merge task. * @param parentMR the parent MapReduce work * @param parentFS the last FileSinkOperator in the parent MapReduce work * @return the MapredWork */ private MapredWork createMergeTask(HiveConf conf, Operator<? extends Serializable> topOp, FileSinkDesc fsDesc) { ArrayList<String> aliases = new ArrayList<String>(); String inputDir = fsDesc.getDirName(); TableDesc tblDesc = fsDesc.getTableInfo(); aliases.add(inputDir); // dummy alias: just use the input path // constructing the default MapredWork MapredWork cplan = GenMapRedUtils.getMapRedWork(conf); cplan.getPathToAliases().put(inputDir, aliases); cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); cplan.setNumReduceTasks(0); cplan.getAliasToWork().put(inputDir, topOp); cplan.setMapperCannotSpanPartns(true); return cplan; } /**
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); } initUnionPlan(opProcCtx, currTask, false); }
ReduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers());
/** * Split the current plan by creating a temporary destination. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); MapredWork cplan = getMapRedWork(parseCtx.getConf()); Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx .getConf()); Operator<? extends Serializable> reducer = op.getChildOperators().get(0); // Add the reducer cplan.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, redTask); Task<? extends Serializable> currTask = opProcCtx.getCurrTask(); splitTasks(op, currTask, redTask, opProcCtx, true, false, 0); opProcCtx.getRootOps().add(op); }
console .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator"); work.setNumReduceTasks(Integer.valueOf(0)); } else { if (numReducersFromWork >= 0) { } else if (job.getNumReduceTasks() > 0) { int reducers = job.getNumReduceTasks(); work.setNumReduceTasks(reducers); console .printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: " } else { int reducers = estimateNumberOfReducers(); work.setNumReduceTasks(reducers); console .printInfo("Number of reduce tasks not specified. Estimated from input data size: "
plan.setNumReduceTasks(desc.getNumReducers()); } else { opTaskMap.put(op, currTask);