org.apache.tajo.plan.logical.GroupbyNode java code examples

List<Column> originalGroupingColumns = Arrays.asList(groupbyNode.getGroupingColumns());
List<AggregationFunctionCallEval> aggFunctions = groupbyNode.getAggFunctions();
for (int aggIdx = 0; aggIdx < aggFunctions.size(); aggIdx++) {
 AggregationFunctionCallEval aggFunction = aggFunctions.get(aggIdx);
 Target aggFunctionTarget = groupbyNode.getTargets().get(originalGroupingColumns.size() + aggIdx);
  DistinctGroupbyNodeBuildInfo buildInfo = distinctNodeBuildInfos.get(groupbyMapKey);
  if (buildInfo == null) {
   GroupbyNode distinctGroupbyNode = new GroupbyNode(context.getPlan().getLogicalPlan().newPID());
   buildInfo = new DistinctGroupbyNodeBuildInfo(distinctGroupbyNode);
   distinctNodeBuildInfos.put(groupbyMapKey, buildInfo);
   distinctGroupbyNode.setGroupingColumns(groupingColumns.toArray(new Column[groupingColumns.size()]));
 for (Column column : eachGroupbyNode.getGroupingColumns()) {
  targets.add(new Target(new FieldEval(column)));
 eachGroupbyNode.setTargets(targets);
 eachGroupbyNode.setAggFunctions(groupbyAggFunctions);
 eachGroupbyNode.setDistinct(true);
 eachGroupbyNode.setInSchema(groupbyNode.getInSchema());
 GroupbyNode otherGroupbyNode = new GroupbyNode(context.getPlan().getLogicalPlan().newPID());
 otherGroupbyNode.setTargets(targets);
 otherGroupbyNode.setGroupingColumns(originalGroupingColumns.toArray(new Column[originalGroupingColumns.size()]));
 otherGroupbyNode.setAggFunctions(otherAggregationFunctionCallEvals);
 otherGroupbyNode.setInSchema(groupbyNode.getInSchema());

Column [] originalGroupingColumns = groupbyNode.getGroupingColumns();
LinkedHashSet<Column> firstStageGroupingColumns =
  Sets.newLinkedHashSet(Arrays.asList(groupbyNode.getGroupingColumns()));
List<AggregationFunctionCallEval> firstStageAggFunctions = Lists.newArrayList();
List<AggregationFunctionCallEval> secondPhaseEvalNodes = Lists.newArrayList();
List<Target> firstPhaseEvalNodeTargets = Lists.newArrayList();
for (AggregationFunctionCallEval aggFunction : groupbyNode.getAggFunctions()) {
 if (aggFunction.isDistinct()) {
GroupbyNode firstStageGroupby = new GroupbyNode(context.plan.getLogicalPlan().newPID());
firstStageGroupby.setGroupingColumns(TUtil.toArray(firstStageGroupingColumns, Column.class));
firstStageGroupby.setAggFunctions(TUtil.toArray(firstStageAggFunctions, AggregationFunctionCallEval.class));
firstStageGroupby.setTargets(firstStageTargets);
firstStageGroupby.setChild(groupbyNode.getChild());
firstStageGroupby.setInSchema(groupbyNode.getInSchema());
GroupbyNode secondPhaseGroupby = new GroupbyNode(context.plan.getLogicalPlan().newPID());
secondPhaseGroupby.setGroupingColumns(originalGroupingColumns);
secondPhaseGroupby.setAggFunctions(TUtil.toArray(secondPhaseEvalNodes, AggregationFunctionCallEval.class));
secondPhaseGroupby.setTargets(groupbyNode.getTargets());
secondStage.getEnforcer().enforceSortAggregation(secondPhaseGroupby.getPID(), sortSpecs);
channel.setShuffleKeys(secondPhaseGroupby.getGroupingColumns().clone());
channel.setSchema(firstStage.getPlan().getOutSchema());
channel.setDataFormat(dataFormat);
secondPhaseGroupby.setChild(scanNode);

private void insertDistinctOperator(LogicalPlan plan, LogicalPlan.QueryBlock block,
                  ProjectionNode projectionNode, LogicalNode child) throws TajoException {
 if (projectionNode.getChild().getType() != NodeType.GROUP_BY) {
  Schema outSchema = projectionNode.getOutSchema();
  GroupbyNode dupRemoval = plan.createNode(GroupbyNode.class);
  dupRemoval.setChild(child);
  dupRemoval.setInSchema(projectionNode.getInSchema());
  dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema));
  dupRemoval.setGroupingColumns(outSchema.toArray());
  block.registerNode(dupRemoval);
  block.setAggregationRequire();
  projectionNode.setChild(dupRemoval);
  projectionNode.setInSchema(dupRemoval.getOutSchema());
 }
}

public AggregationExec(final TaskAttemptContext context, GroupbyNode plan,
            PhysicalExec child) throws IOException {
 super(context, plan.getInSchema(), plan.getOutSchema(), child);
 final Column [] keyColumns = plan.getGroupingColumns();
 groupingKeyNum = keyColumns.length;
 if (plan.hasAggFunctions()) {
  aggFunctions = plan.getAggFunctions();
  aggFunctionsNum = aggFunctions.length;
 } else {
  aggFunctions = new AggregationFunctionCallEval[0];
  aggFunctionsNum = 0;
 }
}

public void visitGroupBy(SQLBuilderContext ctx, GroupbyNode groupby, Stack<LogicalNode> stack) {
 visit(ctx, groupby.getChild(), stack);
 ctx.sb.append("GROUP BY ").append(StringUtils.join(groupby.getGroupingColumns(), ",", 0)).append(" ");
}

private PlanProto.LogicalNode.Builder buildGroupby(SerializeContext context, GroupbyNode node)
  throws TajoException {
 int [] childIds = registerGetChildIds(context, node);
 PlanProto.GroupbyNode.Builder groupbyBuilder = PlanProto.GroupbyNode.newBuilder();
 groupbyBuilder.setChildSeq(childIds[0]);
 groupbyBuilder.setDistinct(node.isDistinct());
 if (node.groupingKeyNum() > 0) {
  groupbyBuilder.addAllGroupingKeys(
    ProtoUtil.<CatalogProtos.ColumnProto>toProtoObjects(node.getGroupingColumns()));
 }
 if (node.hasAggFunctions()) {
  groupbyBuilder.addAllAggFunctions(
    ProtoUtil.<PlanProto.EvalNodeTree>toProtoObjects(node.getAggFunctions().toArray(new ProtoObject[node.getAggFunctions().size()])));
 }
 if (node.hasTargets()) {
  groupbyBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(node.getTargets().toArray(new ProtoObject[node.getTargets().size()])));
 }
 PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node);
 nodeBuilder.setGroupby(groupbyBuilder);
 return nodeBuilder;
}

groupbyNode.setChild(child);
groupbyNode.setInSchema(child.getOutSchema());
groupbyNode.setGroupingColumns(new Column[] {});
groupbyNode.setDistinct(includeDistinctFunction);
groupbyNode.setAggFunctions(aggEvals.toArray(new AggregationFunctionCallEval[aggEvals.size()]));
Target [] targets = ProjectionPushDownRule.buildGroupByTarget(groupbyNode, null,
  aggEvalNames.toArray(new String[aggEvalNames.size()]));
groupbyNode.setTargets(targets);

Schema originOutputSchema = originGroupbyNode.getOutSchema();
DistinctGroupbyNode firstStageDistinctNode = PlannerUtil.clone(plan, baseDistinctNode);
DistinctGroupbyNode secondStageDistinctNode = baseDistinctNode;
 if (firstStageGroupbyNode.isDistinct()) {
  firstStageGroupbyNode.setAggFunctions(PlannerUtil.EMPTY_AGG_FUNCS);
  for (Column column : firstStageGroupbyNode.getGroupingColumns()) {
   Target target = new Target(new FieldEval(column));
   firstGroupbyTargets.add(target);
  firstStageGroupbyNode.setTargets(firstGroupbyTargets.toArray(new Target[]{}));
  secondStageGroupbyNode.setGroupingColumns(originGroupColumns.toArray(new Column[]{}));
  Target[] oldTargets = secondStageGroupbyNode.getTargets();
  List<Target> secondGroupbyTargets = new ArrayList<Target>();
  LinkedHashSet<Column> distinctColumns = EvalTreeUtil.findUniqueColumns(secondStageGroupbyNode.getAggFunctions()[0]);
  List<Column> uniqueDistinctColumn = new ArrayList<Column>();
  for (int aggFuncIdx = 0; aggFuncIdx < secondStageGroupbyNode.getAggFunctions().length; aggFuncIdx++) {
   secondStageGroupbyNode.getAggFunctions()[aggFuncIdx].setLastPhase();
   int targetIdx = originGroupColumns.size() + uniqueDistinctColumn.size() + aggFuncIdx;
   Target aggFuncTarget = oldTargets[targetIdx];
  secondStageGroupbyNode.setTargets(secondGroupbyTargets.toArray(new Target[]{}));
 } else {

int groupingKeyNum = node.getGroupingColumns().length;
LinkedHashSet<String> groupingKeyNames = null;
String[] aggEvalNames = null;
if (!node.isForDistinctBlock()) {
   FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
   groupingKeyNames.add(newContext.addExpr(fieldEval));
 if (node.hasAggFunctions()) {
  final int evalNum = node.getAggFunctions().size();
  aggEvalNames = new String[evalNum];
  for (int evalIdx = 0, targetIdx = node.getGroupingColumns().length; targetIdx < node.getTargets().size();
     evalIdx++, targetIdx++) {
   Target target = node.getTargets().get(targetIdx);
   EvalNode evalNode = node.getAggFunctions().get(evalIdx);
   aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName()));
node.setInSchema(child.getOutSchema());
if (node.isForDistinctBlock()) { // the grouping columns should be updated according to the schema of child node.
 node.setGroupingColumns(child.getOutSchema().toArray());
 node.setTargets(PlannerUtil.schemaToTargets(child.getOutSchema()));
 groupingKeyNum = node.getGroupingColumns().length;
 groupingKeyNames = Sets.newLinkedHashSet();
 for (int i = 0; i < groupingKeyNum; i++) {
  FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
  groupingKeyNames.add(newContext.addExpr(fieldEval));

private static GroupbyNode convertGroupby(OverridableConf context, EvalContext evalContext,
                     Map<Integer, LogicalNode> nodeMap, PlanProto.LogicalNode protoNode) {
 PlanProto.GroupbyNode groupbyProto = protoNode.getGroupby();
 GroupbyNode groupby = new GroupbyNode(protoNode.getNodeId());
 groupby.setChild(nodeMap.get(groupbyProto.getChildSeq()));
 groupby.setDistinct(groupbyProto.getDistinct());
 if (groupbyProto.getGroupingKeysCount() > 0) {
  groupby.setGroupingColumns(convertColumns(groupbyProto.getGroupingKeysList()));
 }
 if (groupbyProto.getAggFunctionsCount() > 0) {
  groupby.setAggFunctions(convertAggFuncCallEvals(context, evalContext, groupbyProto.getAggFunctionsList()));
 }
 if (groupbyProto.getTargetsCount() > 0) {
  groupby.setTargets(convertTargets(context, evalContext, groupbyProto.getTargetsList()));
 }
 groupby.setInSchema(convertSchema(protoNode.getInSchema()));
 groupby.setOutSchema(convertSchema(protoNode.getOutSchema()));
 return groupby;
}

private ExecutionBlock buildTwoPhaseGroupby(MasterPlan masterPlan, ExecutionBlock latestBlock,
                          GroupbyNode firstPhaseGroupby, GroupbyNode secondPhaseGroupby) throws TajoException {
 ExecutionBlock childBlock = latestBlock;
 childBlock.setPlan(firstPhaseGroupby);
 ExecutionBlock currentBlock = masterPlan.newExecutionBlock();
 DataChannel channel;
 if (firstPhaseGroupby.isEmptyGrouping()) {
  channel = new DataChannel(childBlock, currentBlock, HASH_SHUFFLE, 1);
  channel.setShuffleKeys(firstPhaseGroupby.getGroupingColumns());
 } else {
  channel = new DataChannel(childBlock, currentBlock, HASH_SHUFFLE, 32);
  channel.setShuffleKeys(firstPhaseGroupby.getGroupingColumns());
 }
 channel.setSchema(firstPhaseGroupby.getOutSchema());
 channel.setDataFormat(dataFormat);
 ScanNode scanNode = buildInputExecutor(masterPlan.getLogicalPlan(), channel);
 secondPhaseGroupby.setChild(scanNode);
 secondPhaseGroupby.setInSchema(scanNode.getOutSchema());
 currentBlock.setPlan(secondPhaseGroupby);
 masterPlan.addConnect(channel);
 return currentBlock;
}

for (int i = 0; i < groupbyNode.getAggFunctions().size(); i++) {
 aggFunctions.add((AggregationFunctionCallEval) groupbyNode.getAggFunctions().get(i).clone());
 aggFunctions.get(i).setFirstPhase();
 if (groupbyNode.getGroupingColumns().length == 0
   && aggFunctions.size() == groupbyNode.getOutSchema().getRootColumns().size()) {
  aggFunctions.get(i).setAlias(groupbyNode.getOutSchema().getColumn(i).getQualifiedName());
if (groupbyNode.getGroupingColumns().length == 0
  && aggFunctions.size() == groupbyNode.getOutSchema().getRootColumns().size()) {
 groupbyNode.setAggFunctions(aggFunctions);
if (!lastGroupbyNode.isDistinct()) {
 int index = 0;
 for (AggregationFunctionCallEval aggrFunction: lastGroupbyNode.getAggFunctions()) {
  aggrFunction.setIntermediatePhase();
  aggrFunction.setArgs(new EvalNode[]{new FieldEval(lastGroupbyNode.getTargets().get(index).getNamedColumn())});
  index++;
if (!lastGroupbyNode.isDistinct()) {
 int index = 0;
 for (AggregationFunctionCallEval aggrFunction: lastGroupbyNode.getAggFunctions()) {
  aggrFunction.setFirstPhase();
  aggrFunction.setArgs(new EvalNode[]{new FieldEval(lastGroupbyNode.getTargets().get(index).getNamedColumn())});
  index++;
secondStageDistinctNode.setOutSchema(firstStageDistinctNode.getOutSchema());
thirdStageDistinctNode.setInSchema(firstStageDistinctNode.getOutSchema());
thirdStageDistinctNode.setOutSchema(groupbyNode.getOutSchema());

GroupbyNode groupbyNode = (GroupbyNode) projectable;
if (!groupbyNode.isEmptyGrouping()) { // it should be targets instead of
 int groupingKeyNum = groupbyNode.getGroupingColumns().length;
  Target target = groupbyNode.getTargets().get(i);
  if (groupbyNode.getTargets().get(i).getEvalTree().getType() == EvalType.FIELD) {
   FieldEval grpKeyEvalNode = target.getEvalTree();
   if (!groupbyNode.getInSchema().contains(grpKeyEvalNode.getColumnRef())) {
    throwCannotEvaluateException(projectable, grpKeyEvalNode.getName());
if (groupbyNode.hasAggFunctions()) {
 verifyIfEvalNodesCanBeEvaluated(projectable, (List<EvalNode>)(List<?>) groupbyNode.getAggFunctions());

if (secondPhaseGroupBy.hasAggFunctions()) {
 int evalNum = secondPhaseGroupBy.getAggFunctions().size();
 List<AggregationFunctionCallEval> secondPhaseEvals = secondPhaseGroupBy.getAggFunctions();
 List<AggregationFunctionCallEval> firstPhaseEvals = new ArrayList<>();
 secondPhaseGroupBy.setAggFunctions(secondPhaseEvals);
 firstPhaseGroupBy.setAggFunctions(firstPhaseEvals);
 List<Target> firstPhaseTargets = ProjectionPushDownRule.buildGroupByTarget(firstPhaseGroupBy, null,
   firstPhaseEvalNames);
 firstPhaseGroupBy.setTargets(firstPhaseTargets);
 secondPhaseGroupBy.setInSchema(PlannerUtil.targetToSchema(firstPhaseTargets));

  .add("age", Type.INT2)
  .build();
GroupbyNode groupbyNode = new GroupbyNode(0);
groupbyNode.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)});
ScanNode scanNode = new ScanNode(0);
scanNode.init(CatalogUtil.newTableDesc("in", schema,
  CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf), new Path("in")));
GroupbyNode groupbyNode2 = new GroupbyNode(0);
groupbyNode2.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)});
JoinNode joinNode = new JoinNode(0);
ScanNode scanNode2 = new ScanNode(0);
  CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf), new Path("in2")));
groupbyNode.setChild(scanNode);
groupbyNode2.setChild(joinNode);
joinNode.setLeftChild(scanNode);
joinNode.setRightChild(scanNode2);
assertTrue(groupbyNode.equals(groupbyNode2));
assertFalse(groupbyNode.deepEquals(groupbyNode2));
groupbyNode2.setChild(scanNode3);
assertTrue(groupbyNode.equals(groupbyNode2));
assertTrue(groupbyNode.deepEquals(groupbyNode2));

public String getShortPlanString() {
 StringBuilder sb = new StringBuilder();
 sb.append(getType().name() + "(" + getPID() + ")").append("(");
 Column [] groupingColumns = this.groupingKeys;
 for (int j = 0; j < groupingColumns.length; j++) {
 if (hasAggFunctions()) {
  sb.append(", exprs: (");
 sb.append(", out schema:").append(getOutSchema().toString());
 sb.append(", in schema:").append(getInSchema().toString());

if (eachGroupby.isDistinct()) {
 aggregatorList.add(new DistinctFinalAggregator(distinctSeq, inTupleIndex, outTupleIndex, eachGroupby));
 distinctSeq++;
 Column[] distinctGroupingColumns = eachGroupby.getGroupingColumns();
 inTupleIndex += distinctGroupingColumns.length;
 outTupleIndex += eachGroupby.getAggFunctions().size();
} else {
 nonDistinctAggr = new DistinctFinalAggregator(-1, inTupleIndex, outTupleIndex, eachGroupby);
 outTupleIndex += eachGroupby.getAggFunctions().size();
resultTupleLength += eachGroupby.getAggFunctions().size();
Collections.addAll(groupingColumnSet, eachGroupby.getGroupingColumns());
for (Target eachTarget: eachGroupby.getTargets()) {
 if (!groupingColumnSet.contains(eachTarget.getNamedColumn())) {

public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException {
 super(context, plan, child);
 contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().size()];
 final Column [] keyColumns = plan.getGroupingColumns();
 groupingKeyIds = new int[groupingKeyNum];
 Column col;
 for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) {
  col = keyColumns[idx];
  if (col.hasQualifier()) {
   groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName());
  } else {
   groupingKeyIds[idx] = inSchema.getColumnIdByName(col.getSimpleName());
  }
 }
 currentKey = new VTuple(groupingKeyNum);
 outTuple = new VTuple(outSchema.size());
}

private void setDistinctAggregationEnforcer(
  ExecutionBlock firstStageBlock, DistinctGroupbyNode firstStageDistinctNode,
  ExecutionBlock secondStageBlock, DistinctGroupbyNode secondStageDistinctNode) {
 firstStageBlock.getEnforcer().enforceDistinctAggregation(firstStageDistinctNode.getPID(),
   DistinctAggregationAlgorithm.HASH_AGGREGATION, null);
 List<SortSpecArray> sortSpecArrays = new ArrayList<SortSpecArray>();
 int index = 0;
 for (GroupbyNode groupbyNode: firstStageDistinctNode.getSubPlans()) {
  List<SortSpecProto> sortSpecs = new ArrayList<SortSpecProto>();
  for (Column column: groupbyNode.getGroupingColumns()) {
   sortSpecs.add(SortSpecProto.newBuilder().setColumn(column.getProto()).build());
  }
  sortSpecArrays.add( SortSpecArray.newBuilder()
    .setNodeId(secondStageDistinctNode.getSubPlans().get(index).getPID())
    .addAllSortSpecs(sortSpecs).build());
 }
 secondStageBlock.getEnforcer().enforceDistinctAggregation(secondStageDistinctNode.getPID(),
   DistinctAggregationAlgorithm.SORT_AGGREGATION, sortSpecArrays);
}

@Override
public RESULT visitGroupBy(CONTEXT context, LogicalPlan plan, LogicalPlan.QueryBlock block, GroupbyNode node,
              Stack<LogicalNode> stack) throws TajoException {
 stack.push(node);
 RESULT result = visit(context, plan, block, node.getChild(), stack);
 stack.pop();
 return result;
}

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
compareTo (BigDecimal)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getSupportFragmentManager (FragmentActivity)
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Best IntelliJ plugins

How to useGroupbyNode in org.apache.tajo.plan.logical

Best Java code snippets using org.apache.tajo.plan.logical.GroupbyNode (Showing top 20 results out of 315)

How to use
GroupbyNode
in
org.apache.tajo.plan.logical