private DummyStoreOperator getJoinParentOp(Operator<?> mergeReduceOp) { for (Operator<?> childOp : mergeReduceOp.getChildOperators()) { if ((childOp.getChildOperators() == null) || (childOp.getChildOperators().isEmpty())) { if (childOp instanceof DummyStoreOperator) { return (DummyStoreOperator) childOp; } else { throw new IllegalStateException("Was expecting dummy store operator but found: " + childOp); } } else { return getJoinParentOp(childOp); } } throw new IllegalStateException("Expecting a DummyStoreOperator found op: " + mergeReduceOp); } }
private void initializeMultipleSources(ReduceWork redWork, int numTags, ObjectInspector[] ois, ReduceRecordSource[] sources) throws Exception { for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { if (redWork.getTagToValueDesc().get(tag) == null) { continue; } checkAbortCondition(); initializeSourceForTag(redWork, tag, ois, sources, redWork.getTagToValueDesc().get(tag), redWork.getTagToInput().get(tag)); } }
@Override void run() throws Exception { for (Entry<String, LogicalOutput> outputEntry : outputs.entrySet()) { l4j.info("Starting Output: " + outputEntry.getKey()); if (!isAborted()) { outputEntry.getValue().start(); ((TezKVOutputCollector) outMap.get(outputEntry.getKey())).initialize(); } } // run the operator pipeline startAbortChecks(); while (sources[bigTablePosition].pushRecord()) { addRowAndMaybeCheckAbort(); } }
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs); checkAbortCondition(); if (shuffleInputs != null) { l4j.info("Waiting for ShuffleInputs to become ready"); checkAbortCondition(); DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer); connectOps.put(mergeReduceWork.getTag(), dummyStoreOp); tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork); checkAbortCondition(); checkAbortCondition(); RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs); registryTez.init(registryConf); checkAbortCondition(); initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources); ((TezContext) MapredContext.get()).setRecordSources(sources); reducer.initialize(jconf, mainWorkOIs); checkAbortCondition(); initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0)); reducer.initializeLocalWork(jconf); checkAbortCondition();
if (isAborted()) { for (ReduceRecordSource rs: sources) { if (!rs.close()) { setAborted(false); // Preserving the old logic. Hmm... break; boolean abort = isAborted(); reducer.close(abort); if (mergeWorkList != null) { if (!isAborted()) {
public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { super(jconf, context); String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cache = ObjectCacheFactory.getCache(jconf, queryId, true); dynamicValueCache = ObjectCacheFactory.getCache(jconf, queryId, false, true); String cacheKey = processorContext.getTaskVertexName() + REDUCE_PLAN_KEY; cacheKeys = Lists.newArrayList(cacheKey); dynamicValueCacheKeys = new ArrayList<String>(); reduceWork = (ReduceWork) cache.retrieve(cacheKey, new Callable<Object>() { @Override public Object call() { return Utilities.getReduceWork(jconf); } }); Utilities.setReduceWork(jconf, reduceWork); mergeWorkList = getMergeWorkList(jconf, cacheKey, queryId, cache, cacheKeys); }
rproc = new MapRecordProcessor(jobConf, getContext()); } else { rproc = new ReduceRecordProcessor(jobConf, getContext());
private void initializeMultipleSources(ReduceWork redWork, int numTags, ObjectInspector[] ois, ReduceRecordSource[] sources) throws Exception { for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { if (redWork.getTagToValueDesc().get(tag) == null) { continue; } initializeSourceForTag(redWork, tag, ois, sources, redWork.getTagToValueDesc().get(tag), redWork.getTagToInput().get(tag)); } }
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs); checkAbortCondition(); if (shuffleInputs != null) { l4j.info("Waiting for ShuffleInputs to become ready"); checkAbortCondition(); DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer); connectOps.put(mergeReduceWork.getTag(), dummyStoreOp); tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork); checkAbortCondition(); checkAbortCondition(); RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs); registryTez.init(registryConf); checkAbortCondition(); initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources); ((TezContext) MapredContext.get()).setRecordSources(sources); reducer.initialize(jconf, mainWorkOIs); checkAbortCondition(); initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0)); reducer.initializeLocalWork(jconf); checkAbortCondition();
if (isAborted()) { for (ReduceRecordSource rs: sources) { if (!rs.close()) { setAborted(false); // Preserving the old logic. Hmm... break; boolean abort = isAborted(); reducer.close(abort); if (mergeWorkList != null) { if (!isAborted()) {
public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { super(jconf, context); String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cache = ObjectCacheFactory.getCache(jconf, queryId, true); dynamicValueCache = ObjectCacheFactory.getCache(jconf, queryId, false); String cacheKey = processorContext.getTaskVertexName() + REDUCE_PLAN_KEY; cacheKeys = Lists.newArrayList(cacheKey); dynamicValueCacheKeys = new ArrayList<String>(); reduceWork = (ReduceWork) cache.retrieve(cacheKey, new Callable<Object>() { @Override public Object call() { return Utilities.getReduceWork(jconf); } }); Utilities.setReduceWork(jconf, reduceWork); mergeWorkList = getMergeWorkList(jconf, cacheKey, queryId, cache, cacheKeys); }
rproc = new MapRecordProcessor(jobConf, getContext()); } else { rproc = new ReduceRecordProcessor(jobConf, getContext());
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs); if (shuffleInputs != null) { l4j.info("Waiting for ShuffleInputs to become ready"); ReduceWork mergeReduceWork = (ReduceWork) mergeWork; reducer = mergeReduceWork.getReducer(); DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer); connectOps.put(mergeReduceWork.getTag(), dummyStoreOp); tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork); sources = new ReduceRecordSource[numTags]; mainWorkOIs = new ObjectInspector[numTags]; initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources); ((TezContext) MapredContext.get()).setRecordSources(sources); reducer.initialize(jconf, mainWorkOIs); redWork = tagToReducerMap.get(i); reducer = redWork.getReducer(); initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0)); reducer.initializeLocalWork(jconf); children.addAll(dummyOps); createOutputMap(); OperatorUtils.setChildrenCollector(children, outMap);
@Override void run() throws Exception { for (Entry<String, LogicalOutput> outputEntry : outputs.entrySet()) { l4j.info("Starting Output: " + outputEntry.getKey()); if (!isAborted()) { outputEntry.getValue().start(); ((TezKVOutputCollector) outMap.get(outputEntry.getKey())).initialize(); } } // run the operator pipeline startAbortChecks(); while (sources[bigTablePosition].pushRecord()) { addRowAndMaybeCheckAbort(); } }
private void initializeMultipleSources(ReduceWork redWork, int numTags, ObjectInspector[] ois, ReduceRecordSource[] sources) throws Exception { for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { if (redWork.getTagToValueDesc().get(tag) == null) { continue; } checkAbortCondition(); initializeSourceForTag(redWork, tag, ois, sources, redWork.getTagToValueDesc().get(tag), redWork.getTagToInput().get(tag)); } }
private DummyStoreOperator getJoinParentOp(Operator<?> mergeReduceOp) { for (Operator<?> childOp : mergeReduceOp.getChildOperators()) { if ((childOp.getChildOperators() == null) || (childOp.getChildOperators().isEmpty())) { if (childOp instanceof DummyStoreOperator) { return (DummyStoreOperator) childOp; } else { throw new IllegalStateException("Was expecting dummy store operator but found: " + childOp); } } else { return getJoinParentOp(childOp); } } throw new IllegalStateException("Expecting a DummyStoreOperator found op: " + mergeReduceOp); } }
public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { super(jconf, context); ObjectCache cache = ObjectCacheFactory.getCache(jconf); String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cacheKey = queryId + REDUCE_PLAN_KEY; cacheKeys = new ArrayList<String>(); cacheKeys.add(cacheKey); reduceWork = (ReduceWork) cache.retrieve(cacheKey, new Callable<Object>() { @Override public Object call() { return Utilities.getReduceWork(jconf); } }); Utilities.setReduceWork(jconf, reduceWork); mergeWorkList = getMergeWorkList(jconf, cacheKey, queryId, cache, cacheKeys); }
@Override public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR); // in case of broadcast-join read the broadcast edge inputs // (possibly asynchronously) LOG.info("Running task: " + getContext().getUniqueIdentifier()); if (isMap) { rproc = new MapRecordProcessor(jobConf, getContext()); } else { rproc = new ReduceRecordProcessor(jobConf, getContext()); } initializeAndRunProcessor(inputs, outputs); }
private DummyStoreOperator getJoinParentOp(Operator<?> mergeReduceOp) { for (Operator<?> childOp : mergeReduceOp.getChildOperators()) { if ((childOp.getChildOperators() == null) || (childOp.getChildOperators().isEmpty())) { if (childOp instanceof DummyStoreOperator) { return (DummyStoreOperator) childOp; } else { throw new IllegalStateException("Was expecting dummy store operator but found: " + childOp); } } else { return getJoinParentOp(childOp); } } throw new IllegalStateException("Expecting a DummyStoreOperator found op: " + mergeReduceOp); } }