public static RelNode getExchange(RelOptCluster cluster, boolean isSingleton, boolean isExchangeRequired, RelTraitSet traits, DrillDistributionTrait distributionTrait, IndexCallContext indexContext, RelNode input) { if (!isExchangeRequired) { return input; } if (isSingleton) { return new SingleMergeExchangePrel(cluster, traits.replace(DrillDistributionTrait.SINGLETON), input, indexContext.getCollation()); } else { return new HashToMergeExchangePrel(cluster, traits.replace(distributionTrait), input, distributionTrait.getFields(), indexContext.getCollation(), PrelUtil.getSettings(cluster).numEndPoints()); } }
/** * A SingleMergeExchange processes a total of M rows coming from N * sorted input streams (from N senders) and merges them into a single * output sorted stream. For costing purposes we can assume each sender * is sending M/N rows to a single receiver. * (See DrillCostBase for symbol notations) * C = CPU cost of SV remover for M/N rows * + Network cost of sending M/N rows to 1 destination. * So, C = (s * M/N) + (w * M/N) * Cost of merging M rows coming from N senders = (M log2 N) * c * Total cost = N * C + (M log2 N) * c */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; double svrCpuCost = DrillCostBase.SVR_CPU_COST * inputRows; double networkCost = DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth; int numEndPoints = PrelUtil.getSettings(getCluster()).numEndPoints(); double mergeCpuCost = DrillCostBase.COMPARE_CPU_COST * inputRows * (Math.log(numEndPoints)/Math.log(2)); DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, svrCpuCost + mergeCpuCost, 0, networkCost); }
/** * In a BroadcastExchange, each sender is sending data to N receivers (for costing * purposes we assume it is also sending to itself). */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); final int numEndPoints = PrelUtil.getSettings(getCluster()).numEndPoints(); final double broadcastFactor = PrelUtil.getSettings(getCluster()).getBroadcastFactor(); final double inputRows = mq.getRowCount(child); final int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; final double cpuCost = broadcastFactor * DrillCostBase.SVR_CPU_COST * inputRows; // We assume localhost network cost is 1/10 of regular network cost // ( c * num_bytes * (N - 1) ) + ( c * num_bytes * 0.1) // = c * num_bytes * (N - 0.9) // TODO: a similar adjustment should be made to HashExchangePrel final double networkCost = broadcastFactor * DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth * (numEndPoints - 0.9); return new DrillCostBase(inputRows, cpuCost, 0, networkCost); }
OperatorPhase.PHASE_1of2); int numEndPoints = PrelUtil.getSettings(phase1Agg.getCluster()).numEndPoints();