@Override protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException { // reduce sink does not have any kids - since the plan by now has been // broken up into multiple // tasks, iterate over all tasks. // For each task, go over all operators recursively for (Task<? extends Serializable> rootTask : rootTasks) { breakTaskTree(rootTask); } PhysicalContext physicalContext = new PhysicalContext(conf, getParseContext(pCtx, rootTasks), ctx, rootTasks, pCtx.getFetchTask()); PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer( physicalContext, conf); physicalOptimizer.optimize(); }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", GBY + RS + GBY + SEL + FS), new SingleGBYProcessor(pctx)); opRules.put(new RuleRegExp("R2", GBY + RS + GBY + FS), new SingleGBYProcessor(pctx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException { // reduce sink does not have any kids - since the plan by now has been // broken up into multiple // tasks, iterate over all tasks. // For each task, go over all operators recursively for (Task<? extends Serializable> rootTask : rootTasks) { breakTaskTree(rootTask); } PhysicalContext physicalContext = new PhysicalContext(conf, getParseContext(pCtx, rootTasks), ctx, rootTasks, pCtx.getFetchTask()); PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer( physicalContext, conf); physicalOptimizer.optimize(); }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", GBY + RS + GBY + SEL + FS), new SingleGBYProcessor(pctx)); opRules.put(new RuleRegExp("R2", GBY + RS + GBY + FS), new SingleGBYProcessor(pctx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() // If getNameToSplitSample is not empty, at least one of the source // tables is being sampled and we can not optimize. || !pctx.getNameToSplitSample().isEmpty()) { return pctx; } String TS = TableScanOperator.getOperatorName() + "%"; String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx)); opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx)); NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() // If getNameToSplitSample is not empty, at least one of the source // tables is being sampled and we can not optimize. || !pctx.getNameToSplitSample().isEmpty()) { return pctx; } String TS = TableScanOperator.getOperatorName() + "%"; String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx)); opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx)); NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
public static void processFileSink(GenTezProcContext context, FileSinkOperator fileSink) throws SemanticException { ParseContext parseContext = context.parseContext; boolean isInsertTable = // is INSERT OVERWRITE TABLE GenMapRedUtils.isInsertInto(parseContext, fileSink); HiveConf hconf = parseContext.getConf(); boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable); Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); if (chDir) { // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or // OrcFileStripeMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); Utilities.FILE_OP_LOGGER.debug("will generate MR work for merging files from " + fileSink.getConf().getDirName() + " to " + finalName); GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask, parseContext.getQueryState().getLineageState()); } FetchTask fetchTask = parseContext.getFetchTask(); if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) { context.currentTask.setFetchSource(true); } } }
public static void processFileSink(GenTezProcContext context, FileSinkOperator fileSink) throws SemanticException { ParseContext parseContext = context.parseContext; boolean isInsertTable = // is INSERT OVERWRITE TABLE GenMapRedUtils.isInsertInto(parseContext, fileSink); HiveConf hconf = parseContext.getConf(); boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable); Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); if (chDir) { // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or // OrcFileStripeMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask); } FetchTask fetchTask = parseContext.getFetchTask(); if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) { context.currentTask.setFetchSource(true); } } }
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
FetchTask fetchTask = parseCtx.getFetchTask(); if (fetchTask != null && currTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSinkDesc)) {
PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE); PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
FetchTask fetchTask = parseCtx.getFetchTask(); if (fetchTask != null && currTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSinkDesc)) {
FetchTask fetchTask = parseContext.getFetchTask(); if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) {
FetchTask fetchTask = parseContext.getFetchTask(); if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) {
PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE); PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
public void initParseCtx(ParseContext pctx) { opToPartPruner = pctx.getOpToPartPruner(); opToPartList = pctx.getOpToPartList(); opToSamplePruner = pctx.getOpToSamplePruner(); topOps = pctx.getTopOps(); loadTableWork = pctx.getLoadTableWork(); loadFileWork = pctx.getLoadFileWork(); ctx = pctx.getContext(); destTableId = pctx.getDestTableId(); idToTableNameMap = pctx.getIdToTableNameMap(); uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); prunedPartitions = pctx.getPrunedPartitions(); tabNameToTabObject = pctx.getTabNameToTabObject(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); }
public void initParseCtx(ParseContext pctx) { opToPartPruner = pctx.getOpToPartPruner(); opToPartList = pctx.getOpToPartList(); opToSamplePruner = pctx.getOpToSamplePruner(); topOps = pctx.getTopOps(); loadTableWork = pctx.getLoadTableWork(); loadFileWork = pctx.getLoadFileWork(); ctx = pctx.getContext(); destTableId = pctx.getDestTableId(); idToTableNameMap = pctx.getIdToTableNameMap(); uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); prunedPartitions = pctx.getPrunedPartitions(); tabNameToTabObject = pctx.getTabNameToTabObject(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); }
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsOpToTsOpMap(pCtx.getRsOpToTsOpMap()); return clone; }
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsToSemiJoinBranchInfo(pCtx.getRsToSemiJoinBranchInfo()); clone.setColExprToGBMap(pCtx.getColExprToGBMap()); clone.setSemiJoinHints(pCtx.getSemiJoinHints()); return clone; }