return costFactory.makeCost(totLeadRowCount, totCpuCost, totDiskCost, totNetworkCost, totMemoryCost);
return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0 /* network cost */, memCost);
/** * HashToRandomExchange processes M input rows and hash partitions them * based on computing a hash value on the distribution fields. * If there are N nodes (endpoints), we can assume for costing purposes * on average each sender will send M/N rows to 1 destination endpoint. * (See DrillCostBase for symbol notations) * Include impact of skewness of distribution : the more keys used, the less likely the distribution will be skewed. * The hash cpu cost will be proportional to 1 / #_keys. * C = CPU cost of hashing k fields of M/N rows * + CPU cost of SV remover for M/N rows * + Network cost of sending M/N rows to 1 destination. * So, C = (h * 1/k * M/N) + (s * M/N) + (w * M/N) * Total cost = N * C */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; double hashCpuCost = DrillCostBase.HASH_CPU_COST * inputRows / fields.size(); double svrCpuCost = DrillCostBase.SVR_CPU_COST * inputRows; double networkCost = DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(inputRows, hashCpuCost + svrCpuCost, 0, networkCost); }
public SqlConverter(QueryContext context) { this.settings = context.getPlannerSettings(); this.util = context; this.functions = context.getFunctionRegistry(); this.parserConfig = new DrillParserConfig(settings); this.sqlToRelConverterConfig = new SqlToRelConverterConfig(); this.isInnerQuery = false; this.typeFactory = new JavaTypeFactoryImpl(DRILL_TYPE_SYSTEM); this.defaultSchema = context.getNewDefaultSchema(); this.rootSchema = rootSchema(defaultSchema); this.temporarySchema = context.getConfig().getString(ExecConstants.DEFAULT_TEMPORARY_WORKSPACE); this.session = context.getSession(); this.drillConfig = context.getConfig(); this.catalog = new DrillCalciteCatalogReader( rootSchema, parserConfig.caseSensitive(), DynamicSchema.from(defaultSchema).path(null), typeFactory, drillConfig, session); this.opTab = new ChainedSqlOperatorTable(Arrays.asList(context.getDrillOperatorTable(), catalog)); this.costFactory = (settings.useDefaultCosting()) ? null : new DrillCostBase.DrillCostFactory(); this.validator = new DrillValidator(opTab, catalog, typeFactory, parserConfig.conformance()); validator.setIdentifierExpansion(true); cluster = null; }
/** * A SingleMergeExchange processes a total of M rows coming from N * sorted input streams (from N senders) and merges them into a single * output sorted stream. For costing purposes we can assume each sender * is sending M/N rows to a single receiver. * (See DrillCostBase for symbol notations) * C = CPU cost of SV remover for M/N rows * + Network cost of sending M/N rows to 1 destination. * So, C = (s * M/N) + (w * M/N) * Cost of merging M rows coming from N senders = (M log2 N) * c * Total cost = N * C + (M log2 N) * c */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; double svrCpuCost = DrillCostBase.SVR_CPU_COST * inputRows; double networkCost = DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth; int numEndPoints = PrelUtil.getSettings(getCluster()).numEndPoints(); double mergeCpuCost = DrillCostBase.COMPARE_CPU_COST * inputRows * (Math.log(numEndPoints)/Math.log(2)); DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, svrCpuCost + mergeCpuCost, 0, networkCost); }
/** * A UnionExchange processes a total of M rows coming from N senders and * combines them into a single output stream. Note that there is * no sort or merge operation going on. For costing purposes, we can * assume each sender is sending M/N rows to a single receiver. * (See DrillCostBase for symbol notations) * C = CPU cost of SV remover for M/N rows * + Network cost of sending M/N rows to 1 destination. * So, C = (s * M/N) + (w * M/N) * Total cost = N * C */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; double svrCpuCost = DrillCostBase.SVR_CPU_COST * inputRows; double networkCost = DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth; DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, svrCpuCost, 0, networkCost); }
public static RelOptCost computeHashJoinCostWithRowCntKeySize(RelOptPlanner planner, double probeRowCount, double buildRowCount, int keySize) { // cpu cost of hashing the join keys for the build side double cpuCostBuild = DrillCostBase.HASH_CPU_COST * keySize * buildRowCount; // cpu cost of hashing the join keys for the probe side double cpuCostProbe = DrillCostBase.HASH_CPU_COST * keySize * probeRowCount; // cpu cost of evaluating each leftkey=rightkey join condition double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * keySize; double factor = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val; long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; // table + hashValues + links double memCost = ( (fieldWidth * keySize) + IntHolder.WIDTH + IntHolder.WIDTH ) * buildRowCount * factor; double cpuCost = joinConditionCost * (probeRowCount) // probe size determine the join condition comparison cost + cpuCostBuild + cpuCostProbe; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { //We use multiplier 0.05 for TopN operator, and 0.1 for Sort, to make TopN a preferred choice. return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); // int rowWidth = child.getRowType().getPrecision(); int numSortFields = this.collation.getFieldCollations().size(); double cpuCost = DrillCostBase.COMPARE_CPU_COST * numSortFields * inputRows * (Math.log(inputRows)/Math.log(2)); double diskIOCost = 0; // assume in-memory for now until we enforce operator-level memory constraints // TODO: use rowWidth instead of avgFieldWidth * numFields // avgFieldWidth * numFields * inputRows double numFields = this.getRowType().getFieldCount(); long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; double memCost = fieldWidth * numFields * inputRows; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0, memCost); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = (mq == null)? ROWCOUNT_UNKNOWN : mq.getRowCount(child); // int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; /* NOTE: the Exchange costing in general has to be examined in a broader context. A RangePartitionExchange * may be added for index plans with RowJeyJoin and Calcite compares the cost of this sub-plan with a * full table scan (FTS) sub-plan without an exchange. The RelSubSet would have Filter-Project-TableScan for * the FTS and a RowKeyJoin whose right input is a RangePartitionExchange-IndexScan. Although a final UnionExchange * is done for both plans, the intermediate costing of index plan with exchange makes it more expensive than the FTS * sub-plan, even though the final cost of the overall FTS would have been more expensive. */ // commenting out following based on comments above // double rangePartitionCpuCost = DrillCostBase.RANGE_PARTITION_CPU_COST * inputRows; // double svrCpuCost = DrillCostBase.SVR_CPU_COST * inputRows; // double networkCost = DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth; DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, 0, 0, 0 /* see comments above */); }
@Override public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) { final PlannerSettings settings = PrelUtil.getPlannerSettings(planner); final ScanStats stats = this.getGroupScan().getScanStats(settings); final int columnCount = this.getRowType().getFieldCount(); if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return planner.getCostFactory().makeCost(stats.getRecordCount() * columnCount, stats.getCpuCost(), stats.getDiskCost()); } // double rowCount = RelMetadataQuery.getRowCount(this); double rowCount = stats.getRecordCount(); // As DRILL-4083 points out, when columnCount == 0, cpuCost becomes zero, // which makes the costs of HiveScan and HiveDrillNativeParquetScan the same double cpuCost = rowCount * Math.max(columnCount, 1); // For now, assume cpu cost is proportional to row count. // If a positive value for CPU cost is given multiply the default CPU cost by given CPU cost. if (stats.getCpuCost() > 0) { cpuCost *= stats.getCpuCost(); } double ioCost = stats.getDiskCost(); DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(rowCount, cpuCost, ioCost, 0); }
@Override public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) { final ScanStats stats = getGroupScan().getScanStats(settings); int columnCount = getRowType().getFieldCount(); double ioCost = 0; boolean isStarQuery = Utilities.isStarQuery(columns); if (isStarQuery) { columnCount = STAR_COLUMN_COST; } // double rowCount = RelMetadataQuery.getRowCount(this); double rowCount = stats.getRecordCount(); if (rowCount < 1) { rowCount = 1; } if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return planner.getCostFactory().makeCost(rowCount * columnCount, stats.getCpuCost(), stats.getDiskCost()); } double cpuCost = rowCount * columnCount; // for now, assume cpu cost is proportional to row count and number of columns DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(rowCount, cpuCost, ioCost, 0); }
/** * Cost of doing Top-N is proportional to M log N where M is the total number of * input rows and N is the limit for Top-N. This makes Top-N preferable to Sort * since cost of full Sort is proportional to M log M . */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { //We use multiplier 0.05 for TopN operator, and 0.1 for Sort, to make TopN a preferred choice. return super.computeSelfCost(planner, mq).multiplyBy(0.05); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int numSortFields = this.collation.getFieldCollations().size(); double cpuCost = DrillCostBase.COMPARE_CPU_COST * numSortFields * inputRows * (Math.log(limit)/Math.log(2)); double diskIOCost = 0; // assume in-memory for now until we enforce operator-level memory constraints DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0); }
protected RelOptCost computeCartesianJoinCost(RelOptPlanner planner, RelMetadataQuery mq) { final double probeRowCount = mq.getRowCount(this.getLeft()); final double buildRowCount = mq.getRowCount(this.getRight()); final DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); final double mulFactor = 10000; // This is a magic number, // just to make sure Cartesian Join is more expensive // than Non-Cartesian Join. final int keySize = 1; // assume having 1 join key, when estimate join cost. final DrillCostBase cost = (DrillCostBase) computeHashJoinCostWithKeySize(planner, keySize, mq).multiplyBy(mulFactor); // Cartesian join row count will be product of two inputs. The other factors come from the above estimated DrillCost. return costFactory.makeCost( buildRowCount * probeRowCount, cost.getCpu(), cost.getIo(), cost.getNetwork(), cost.getMemory() ); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } if (joincategory == JoinCategory.CARTESIAN || joincategory == JoinCategory.INEQUALITY || getJoinType() == JoinRelType.FULL) { return planner.getCostFactory().makeInfiniteCost(); } double leftRowCount = mq.getRowCount(this.getLeft()); double rightRowCount = mq.getRowCount(this.getRight()); // cost of evaluating each leftkey=rightkey join condition double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * this.getLeftKeys().size(); double cpuCost = joinConditionCost * (leftRowCount + rightRowCount); DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(leftRowCount + rightRowCount, cpuCost, 0, 0); }
@Override public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) { final PlannerSettings settings = PrelUtil.getPlannerSettings(planner); final ScanStats stats = this.getGroupScan().getScanStats(settings); final int columnCount = this.getRowType().getFieldCount(); if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return planner.getCostFactory().makeCost(stats.getRecordCount() * columnCount, stats.getCpuCost(), stats.getDiskCost()); } double rowCount = stats.getRecordCount(); double cpuCost = rowCount * Math.max(columnCount, 1); if (stats.getCpuCost() > 0) { cpuCost *= stats.getCpuCost(); } double ioCost = stats.getDiskCost(); DrillCostBase.DrillCostFactory costFactory = (DrillCostBase.DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(rowCount, cpuCost, ioCost, 0); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); int numGroupByFields = this.getGroupCount(); int numAggrFields = this.aggCalls.size(); double cpuCost = DrillCostBase.COMPARE_CPU_COST * numGroupByFields * inputRows; // add cpu cost for computing the aggregate functions cpuCost += DrillCostBase.FUNC_CPU_COST * numAggrFields * inputRows; DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory(); return costFactory.makeCost(inputRows, cpuCost, 0 /* disk i/o cost */, 0 /* network cost */); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } double rowCount = mq.getRowCount(this); // Attribute small cost for projecting simple fields. In reality projecting simple columns in not free and // this allows projection pushdown/project-merge rules to kick-in thereby eliminating unneeded columns from // the projection. double cpuCost = DrillCostBase.PROJECT_CPU_COST * rowCount * nonSimpleFieldCount + (this.getRowType().getFieldCount() - nonSimpleFieldCount) * rowCount * DrillCostBase.BASE_CPU_COST; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(rowCount, cpuCost, 0, 0); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } double leftRowCount = mq.getRowCount(this.getLeft()); double rightRowCount = mq.getRowCount(this.getRight()); double nljFactor = PrelUtil.getSettings(getCluster()).getNestedLoopJoinFactor(); // cpu cost of evaluating each expression in join condition int exprNum = RelOptUtil.conjunctions(getCondition()).size() + RelOptUtil.disjunctions(getCondition()).size(); double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * exprNum; double cpuCost = joinConditionCost * (leftRowCount * rightRowCount) * nljFactor; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(leftRowCount * rightRowCount, cpuCost, 0, 0, 0); }