List<Column> originalGroupingColumns = Arrays.asList(groupbyNode.getGroupingColumns()); List<AggregationFunctionCallEval> aggFunctions = groupbyNode.getAggFunctions(); for (int aggIdx = 0; aggIdx < aggFunctions.size(); aggIdx++) { AggregationFunctionCallEval aggFunction = aggFunctions.get(aggIdx); Target aggFunctionTarget = groupbyNode.getTargets().get(originalGroupingColumns.size() + aggIdx); DistinctGroupbyNodeBuildInfo buildInfo = distinctNodeBuildInfos.get(groupbyMapKey); if (buildInfo == null) { GroupbyNode distinctGroupbyNode = new GroupbyNode(context.getPlan().getLogicalPlan().newPID()); buildInfo = new DistinctGroupbyNodeBuildInfo(distinctGroupbyNode); distinctNodeBuildInfos.put(groupbyMapKey, buildInfo); distinctGroupbyNode.setGroupingColumns(groupingColumns.toArray(new Column[groupingColumns.size()])); for (Column column : eachGroupbyNode.getGroupingColumns()) { targets.add(new Target(new FieldEval(column))); eachGroupbyNode.setTargets(targets); eachGroupbyNode.setAggFunctions(groupbyAggFunctions); eachGroupbyNode.setDistinct(true); eachGroupbyNode.setInSchema(groupbyNode.getInSchema()); GroupbyNode otherGroupbyNode = new GroupbyNode(context.getPlan().getLogicalPlan().newPID()); otherGroupbyNode.setTargets(targets); otherGroupbyNode.setGroupingColumns(originalGroupingColumns.toArray(new Column[originalGroupingColumns.size()])); otherGroupbyNode.setAggFunctions(otherAggregationFunctionCallEvals); otherGroupbyNode.setInSchema(groupbyNode.getInSchema());
Column [] originalGroupingColumns = groupbyNode.getGroupingColumns(); LinkedHashSet<Column> firstStageGroupingColumns = Sets.newLinkedHashSet(Arrays.asList(groupbyNode.getGroupingColumns())); List<AggregationFunctionCallEval> firstStageAggFunctions = Lists.newArrayList(); List<AggregationFunctionCallEval> secondPhaseEvalNodes = Lists.newArrayList(); List<Target> firstPhaseEvalNodeTargets = Lists.newArrayList(); for (AggregationFunctionCallEval aggFunction : groupbyNode.getAggFunctions()) { if (aggFunction.isDistinct()) { GroupbyNode firstStageGroupby = new GroupbyNode(context.plan.getLogicalPlan().newPID()); firstStageGroupby.setGroupingColumns(TUtil.toArray(firstStageGroupingColumns, Column.class)); firstStageGroupby.setAggFunctions(TUtil.toArray(firstStageAggFunctions, AggregationFunctionCallEval.class)); firstStageGroupby.setTargets(firstStageTargets); firstStageGroupby.setChild(groupbyNode.getChild()); firstStageGroupby.setInSchema(groupbyNode.getInSchema()); GroupbyNode secondPhaseGroupby = new GroupbyNode(context.plan.getLogicalPlan().newPID()); secondPhaseGroupby.setGroupingColumns(originalGroupingColumns); secondPhaseGroupby.setAggFunctions(TUtil.toArray(secondPhaseEvalNodes, AggregationFunctionCallEval.class)); secondPhaseGroupby.setTargets(groupbyNode.getTargets()); secondStage.getEnforcer().enforceSortAggregation(secondPhaseGroupby.getPID(), sortSpecs); channel.setShuffleKeys(secondPhaseGroupby.getGroupingColumns().clone()); channel.setSchema(firstStage.getPlan().getOutSchema()); channel.setDataFormat(dataFormat); secondPhaseGroupby.setChild(scanNode);
private void insertDistinctOperator(LogicalPlan plan, LogicalPlan.QueryBlock block, ProjectionNode projectionNode, LogicalNode child) throws TajoException { if (projectionNode.getChild().getType() != NodeType.GROUP_BY) { Schema outSchema = projectionNode.getOutSchema(); GroupbyNode dupRemoval = plan.createNode(GroupbyNode.class); dupRemoval.setChild(child); dupRemoval.setInSchema(projectionNode.getInSchema()); dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema)); dupRemoval.setGroupingColumns(outSchema.toArray()); block.registerNode(dupRemoval); block.setAggregationRequire(); projectionNode.setChild(dupRemoval); projectionNode.setInSchema(dupRemoval.getOutSchema()); } }
public AggregationExec(final TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan.getInSchema(), plan.getOutSchema(), child); final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyNum = keyColumns.length; if (plan.hasAggFunctions()) { aggFunctions = plan.getAggFunctions(); aggFunctionsNum = aggFunctions.length; } else { aggFunctions = new AggregationFunctionCallEval[0]; aggFunctionsNum = 0; } }
public void visitGroupBy(SQLBuilderContext ctx, GroupbyNode groupby, Stack<LogicalNode> stack) { visit(ctx, groupby.getChild(), stack); ctx.sb.append("GROUP BY ").append(StringUtils.join(groupby.getGroupingColumns(), ",", 0)).append(" "); }
private PlanProto.LogicalNode.Builder buildGroupby(SerializeContext context, GroupbyNode node) throws TajoException { int [] childIds = registerGetChildIds(context, node); PlanProto.GroupbyNode.Builder groupbyBuilder = PlanProto.GroupbyNode.newBuilder(); groupbyBuilder.setChildSeq(childIds[0]); groupbyBuilder.setDistinct(node.isDistinct()); if (node.groupingKeyNum() > 0) { groupbyBuilder.addAllGroupingKeys( ProtoUtil.<CatalogProtos.ColumnProto>toProtoObjects(node.getGroupingColumns())); } if (node.hasAggFunctions()) { groupbyBuilder.addAllAggFunctions( ProtoUtil.<PlanProto.EvalNodeTree>toProtoObjects(node.getAggFunctions().toArray(new ProtoObject[node.getAggFunctions().size()]))); } if (node.hasTargets()) { groupbyBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(node.getTargets().toArray(new ProtoObject[node.getTargets().size()]))); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setGroupby(groupbyBuilder); return nodeBuilder; }
groupbyNode.setChild(child); groupbyNode.setInSchema(child.getOutSchema()); groupbyNode.setGroupingColumns(new Column[] {}); groupbyNode.setDistinct(includeDistinctFunction); groupbyNode.setAggFunctions(aggEvals.toArray(new AggregationFunctionCallEval[aggEvals.size()])); Target [] targets = ProjectionPushDownRule.buildGroupByTarget(groupbyNode, null, aggEvalNames.toArray(new String[aggEvalNames.size()])); groupbyNode.setTargets(targets);
Schema originOutputSchema = originGroupbyNode.getOutSchema(); DistinctGroupbyNode firstStageDistinctNode = PlannerUtil.clone(plan, baseDistinctNode); DistinctGroupbyNode secondStageDistinctNode = baseDistinctNode; if (firstStageGroupbyNode.isDistinct()) { firstStageGroupbyNode.setAggFunctions(PlannerUtil.EMPTY_AGG_FUNCS); for (Column column : firstStageGroupbyNode.getGroupingColumns()) { Target target = new Target(new FieldEval(column)); firstGroupbyTargets.add(target); firstStageGroupbyNode.setTargets(firstGroupbyTargets.toArray(new Target[]{})); secondStageGroupbyNode.setGroupingColumns(originGroupColumns.toArray(new Column[]{})); Target[] oldTargets = secondStageGroupbyNode.getTargets(); List<Target> secondGroupbyTargets = new ArrayList<Target>(); LinkedHashSet<Column> distinctColumns = EvalTreeUtil.findUniqueColumns(secondStageGroupbyNode.getAggFunctions()[0]); List<Column> uniqueDistinctColumn = new ArrayList<Column>(); for (int aggFuncIdx = 0; aggFuncIdx < secondStageGroupbyNode.getAggFunctions().length; aggFuncIdx++) { secondStageGroupbyNode.getAggFunctions()[aggFuncIdx].setLastPhase(); int targetIdx = originGroupColumns.size() + uniqueDistinctColumn.size() + aggFuncIdx; Target aggFuncTarget = oldTargets[targetIdx]; secondStageGroupbyNode.setTargets(secondGroupbyTargets.toArray(new Target[]{})); } else {
int groupingKeyNum = node.getGroupingColumns().length; LinkedHashSet<String> groupingKeyNames = null; String[] aggEvalNames = null; if (!node.isForDistinctBlock()) { FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]); groupingKeyNames.add(newContext.addExpr(fieldEval)); if (node.hasAggFunctions()) { final int evalNum = node.getAggFunctions().size(); aggEvalNames = new String[evalNum]; for (int evalIdx = 0, targetIdx = node.getGroupingColumns().length; targetIdx < node.getTargets().size(); evalIdx++, targetIdx++) { Target target = node.getTargets().get(targetIdx); EvalNode evalNode = node.getAggFunctions().get(evalIdx); aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName())); node.setInSchema(child.getOutSchema()); if (node.isForDistinctBlock()) { // the grouping columns should be updated according to the schema of child node. node.setGroupingColumns(child.getOutSchema().toArray()); node.setTargets(PlannerUtil.schemaToTargets(child.getOutSchema())); groupingKeyNum = node.getGroupingColumns().length; groupingKeyNames = Sets.newLinkedHashSet(); for (int i = 0; i < groupingKeyNum; i++) { FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]); groupingKeyNames.add(newContext.addExpr(fieldEval));
private static GroupbyNode convertGroupby(OverridableConf context, EvalContext evalContext, Map<Integer, LogicalNode> nodeMap, PlanProto.LogicalNode protoNode) { PlanProto.GroupbyNode groupbyProto = protoNode.getGroupby(); GroupbyNode groupby = new GroupbyNode(protoNode.getNodeId()); groupby.setChild(nodeMap.get(groupbyProto.getChildSeq())); groupby.setDistinct(groupbyProto.getDistinct()); if (groupbyProto.getGroupingKeysCount() > 0) { groupby.setGroupingColumns(convertColumns(groupbyProto.getGroupingKeysList())); } if (groupbyProto.getAggFunctionsCount() > 0) { groupby.setAggFunctions(convertAggFuncCallEvals(context, evalContext, groupbyProto.getAggFunctionsList())); } if (groupbyProto.getTargetsCount() > 0) { groupby.setTargets(convertTargets(context, evalContext, groupbyProto.getTargetsList())); } groupby.setInSchema(convertSchema(protoNode.getInSchema())); groupby.setOutSchema(convertSchema(protoNode.getOutSchema())); return groupby; }
private ExecutionBlock buildTwoPhaseGroupby(MasterPlan masterPlan, ExecutionBlock latestBlock, GroupbyNode firstPhaseGroupby, GroupbyNode secondPhaseGroupby) throws TajoException { ExecutionBlock childBlock = latestBlock; childBlock.setPlan(firstPhaseGroupby); ExecutionBlock currentBlock = masterPlan.newExecutionBlock(); DataChannel channel; if (firstPhaseGroupby.isEmptyGrouping()) { channel = new DataChannel(childBlock, currentBlock, HASH_SHUFFLE, 1); channel.setShuffleKeys(firstPhaseGroupby.getGroupingColumns()); } else { channel = new DataChannel(childBlock, currentBlock, HASH_SHUFFLE, 32); channel.setShuffleKeys(firstPhaseGroupby.getGroupingColumns()); } channel.setSchema(firstPhaseGroupby.getOutSchema()); channel.setDataFormat(dataFormat); ScanNode scanNode = buildInputExecutor(masterPlan.getLogicalPlan(), channel); secondPhaseGroupby.setChild(scanNode); secondPhaseGroupby.setInSchema(scanNode.getOutSchema()); currentBlock.setPlan(secondPhaseGroupby); masterPlan.addConnect(channel); return currentBlock; }
for (int i = 0; i < groupbyNode.getAggFunctions().size(); i++) { aggFunctions.add((AggregationFunctionCallEval) groupbyNode.getAggFunctions().get(i).clone()); aggFunctions.get(i).setFirstPhase(); if (groupbyNode.getGroupingColumns().length == 0 && aggFunctions.size() == groupbyNode.getOutSchema().getRootColumns().size()) { aggFunctions.get(i).setAlias(groupbyNode.getOutSchema().getColumn(i).getQualifiedName()); if (groupbyNode.getGroupingColumns().length == 0 && aggFunctions.size() == groupbyNode.getOutSchema().getRootColumns().size()) { groupbyNode.setAggFunctions(aggFunctions); if (!lastGroupbyNode.isDistinct()) { int index = 0; for (AggregationFunctionCallEval aggrFunction: lastGroupbyNode.getAggFunctions()) { aggrFunction.setIntermediatePhase(); aggrFunction.setArgs(new EvalNode[]{new FieldEval(lastGroupbyNode.getTargets().get(index).getNamedColumn())}); index++; if (!lastGroupbyNode.isDistinct()) { int index = 0; for (AggregationFunctionCallEval aggrFunction: lastGroupbyNode.getAggFunctions()) { aggrFunction.setFirstPhase(); aggrFunction.setArgs(new EvalNode[]{new FieldEval(lastGroupbyNode.getTargets().get(index).getNamedColumn())}); index++; secondStageDistinctNode.setOutSchema(firstStageDistinctNode.getOutSchema()); thirdStageDistinctNode.setInSchema(firstStageDistinctNode.getOutSchema()); thirdStageDistinctNode.setOutSchema(groupbyNode.getOutSchema());
GroupbyNode groupbyNode = (GroupbyNode) projectable; if (!groupbyNode.isEmptyGrouping()) { // it should be targets instead of int groupingKeyNum = groupbyNode.getGroupingColumns().length; Target target = groupbyNode.getTargets().get(i); if (groupbyNode.getTargets().get(i).getEvalTree().getType() == EvalType.FIELD) { FieldEval grpKeyEvalNode = target.getEvalTree(); if (!groupbyNode.getInSchema().contains(grpKeyEvalNode.getColumnRef())) { throwCannotEvaluateException(projectable, grpKeyEvalNode.getName()); if (groupbyNode.hasAggFunctions()) { verifyIfEvalNodesCanBeEvaluated(projectable, (List<EvalNode>)(List<?>) groupbyNode.getAggFunctions());
if (secondPhaseGroupBy.hasAggFunctions()) { int evalNum = secondPhaseGroupBy.getAggFunctions().size(); List<AggregationFunctionCallEval> secondPhaseEvals = secondPhaseGroupBy.getAggFunctions(); List<AggregationFunctionCallEval> firstPhaseEvals = new ArrayList<>(); secondPhaseGroupBy.setAggFunctions(secondPhaseEvals); firstPhaseGroupBy.setAggFunctions(firstPhaseEvals); List<Target> firstPhaseTargets = ProjectionPushDownRule.buildGroupByTarget(firstPhaseGroupBy, null, firstPhaseEvalNames); firstPhaseGroupBy.setTargets(firstPhaseTargets); secondPhaseGroupBy.setInSchema(PlannerUtil.targetToSchema(firstPhaseTargets));
.add("age", Type.INT2) .build(); GroupbyNode groupbyNode = new GroupbyNode(0); groupbyNode.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)}); ScanNode scanNode = new ScanNode(0); scanNode.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf), new Path("in"))); GroupbyNode groupbyNode2 = new GroupbyNode(0); groupbyNode2.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)}); JoinNode joinNode = new JoinNode(0); ScanNode scanNode2 = new ScanNode(0); CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf), new Path("in2"))); groupbyNode.setChild(scanNode); groupbyNode2.setChild(joinNode); joinNode.setLeftChild(scanNode); joinNode.setRightChild(scanNode2); assertTrue(groupbyNode.equals(groupbyNode2)); assertFalse(groupbyNode.deepEquals(groupbyNode2)); groupbyNode2.setChild(scanNode3); assertTrue(groupbyNode.equals(groupbyNode2)); assertTrue(groupbyNode.deepEquals(groupbyNode2));
public String getShortPlanString() { StringBuilder sb = new StringBuilder(); sb.append(getType().name() + "(" + getPID() + ")").append("("); Column [] groupingColumns = this.groupingKeys; for (int j = 0; j < groupingColumns.length; j++) { if (hasAggFunctions()) { sb.append(", exprs: ("); sb.append(", out schema:").append(getOutSchema().toString()); sb.append(", in schema:").append(getInSchema().toString());
if (eachGroupby.isDistinct()) { aggregatorList.add(new DistinctFinalAggregator(distinctSeq, inTupleIndex, outTupleIndex, eachGroupby)); distinctSeq++; Column[] distinctGroupingColumns = eachGroupby.getGroupingColumns(); inTupleIndex += distinctGroupingColumns.length; outTupleIndex += eachGroupby.getAggFunctions().size(); } else { nonDistinctAggr = new DistinctFinalAggregator(-1, inTupleIndex, outTupleIndex, eachGroupby); outTupleIndex += eachGroupby.getAggFunctions().size(); resultTupleLength += eachGroupby.getAggFunctions().size(); Collections.addAll(groupingColumnSet, eachGroupby.getGroupingColumns()); for (Target eachTarget: eachGroupby.getTargets()) { if (!groupingColumnSet.contains(eachTarget.getNamedColumn())) {
public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan, child); contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().size()]; final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyIds = new int[groupingKeyNum]; Column col; for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) { col = keyColumns[idx]; if (col.hasQualifier()) { groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName()); } else { groupingKeyIds[idx] = inSchema.getColumnIdByName(col.getSimpleName()); } } currentKey = new VTuple(groupingKeyNum); outTuple = new VTuple(outSchema.size()); }
private void setDistinctAggregationEnforcer( ExecutionBlock firstStageBlock, DistinctGroupbyNode firstStageDistinctNode, ExecutionBlock secondStageBlock, DistinctGroupbyNode secondStageDistinctNode) { firstStageBlock.getEnforcer().enforceDistinctAggregation(firstStageDistinctNode.getPID(), DistinctAggregationAlgorithm.HASH_AGGREGATION, null); List<SortSpecArray> sortSpecArrays = new ArrayList<SortSpecArray>(); int index = 0; for (GroupbyNode groupbyNode: firstStageDistinctNode.getSubPlans()) { List<SortSpecProto> sortSpecs = new ArrayList<SortSpecProto>(); for (Column column: groupbyNode.getGroupingColumns()) { sortSpecs.add(SortSpecProto.newBuilder().setColumn(column.getProto()).build()); } sortSpecArrays.add( SortSpecArray.newBuilder() .setNodeId(secondStageDistinctNode.getSubPlans().get(index).getPID()) .addAllSortSpecs(sortSpecs).build()); } secondStageBlock.getEnforcer().enforceDistinctAggregation(secondStageDistinctNode.getPID(), DistinctAggregationAlgorithm.SORT_AGGREGATION, sortSpecArrays); }
@Override public RESULT visitGroupBy(CONTEXT context, LogicalPlan plan, LogicalPlan.QueryBlock block, GroupbyNode node, Stack<LogicalNode> stack) throws TajoException { stack.push(node); RESULT result = visit(context, plan, block, node.getChild(), stack); stack.pop(); return result; }