private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
private double getNumOfBlocks(double rowCount, double sizeFromDisk, double avgRowSize, PluginCost pluginCostModel) { if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) { return Math.ceil(sizeFromDisk / pluginCostModel.getBlockSize(this)); } else { return Math.ceil(rowCount * avgRowSize / pluginCostModel.getBlockSize(this)); } }
private int setConfigValue(DrillConfig config, String configPath, int defaultValue, CheckValid check) { int configValue; try { configValue = config.getInt(configPath); if (!check.isValid(configValue)) { configValue = defaultValue; } } catch (Exception ex) { // Use defaults, if config values not present or any other issue configValue = defaultValue; } return configValue; }
double numBlocks = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCost = numBlocks * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); return costFactory.makeCost(leadRowCount, cpuCost, diskCost, networkCost); double numBlocksIndex = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCostIndex = numBlocksIndex * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows)/ pluginCost.getBlockSize(primaryTableGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount); double diskCostPrimary = diskBlocksPrimary * pluginCost.getRandomBlockReadCost(primaryTableGroupScan); double diskCostTotal = diskCostIndex + diskCostPrimary; return costFactory.makeCost(leadRowCount, cpuCost, diskCostTotal, networkCost);
@Override public ScanStats getScanStats() { // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan double rowCount; PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS: columns.size(); // Get the restricted group scan row count - same as the right side index rows rowCount = computeRestrictedScanRowcount(); // Get the average row size of the primary table double avgRowSize = stats.getAvgRowSize(null, true); if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) { avgRowSize = avgColumnSize * numColumns; } // restricted scan does random lookups and each row may belong to a different block, with the number // of blocks upper bounded by the total num blocks in the primary table double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.min(totalBlocksPrimary, rowCount); double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this); // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor // for biasing towards non-covering plans. diskCost *= stats.getRowKeyJoinBackIOFactor(); logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); }
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); boolean filterPushed = (scanSpec.getSerializedFilter() != null); if (scanSpec != null && scanSpec.getIndexDesc() != null) { double totalBlocks = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.ceil(((avgRowSize * rowsFromDisk)/pluginCostModel.getBlockSize(this))); numBlocks = Math.min(totalBlocks, numBlocks); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this); logger.debug("index_plan_info: JsonIndexGroupScan:{} - indexName:{}: rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, rowsFromDisk {}, diskCost:{}", System.identityHashCode(this), scanSpec.getIndexDesc().getIndexName(), rowCount, avgRowSize, numBlocks, totalBlocks, rowsFromDisk, diskCost);
private ScanStats fullTableScanStats() { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size(); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
@Override public RelOptCost multiplyBy(double factor) { if (this == INFINITY) { return this; } return new DrillCostBase(rowCount * factor, cpu * factor, io * factor, network * factor, memory * factor); }
public MapRDBCost(DrillConfig config, String mediaType) { JSON_AVG_COLUMN_SIZE = setConfigValue(config, PluginConstants.JSON_TABLE_AVERGE_COLUMN_SIZE, PluginConstants.JSON_TABLE_AVERGE_COLUMN_SIZE_DEFAULT, PluginConstants.alwaysValid); JSON_TABLE_BLOCK_SIZE = setConfigValue(config, PluginConstants.JSON_TABLE_BLOCK_SIZE, PluginConstants.JSON_TABLE_BLOCK_SIZE_DEFAULT, PluginConstants.alwaysValid); JSON_SSD_BLOCK_SEQ_READ_COST = setConfigValue(config, PluginConstants.JSON_TABLE_SSD_BLOCK_SEQ_READ_COST, PluginConstants.JSON_TABLE_SSD_BLOCK_SEQ_READ_COST_DEFAULT, PluginConstants.isNonNegative); JSON_SSD_BLOCK_RANDOM_READ_COST = setConfigValue(config, PluginConstants.JSON_TABLE_SSD_BLOCK_RANDOM_READ_COST, PluginConstants.JSON_TABLE_SSD_BLOCK_RANDOM_READ_COST_DEFAULT, new greaterThanEquals(JSON_SSD_BLOCK_SEQ_READ_COST)); JSON_HDD_BLOCK_SEQ_READ_COST = setConfigValue(config, PluginConstants.JSON_TABLE_HDD_BLOCK_SEQ_READ_COST, PluginConstants.JSON_TABLE_HDD_BLOCK_SEQ_READ_COST_DEFAULT, PluginConstants.isNonNegative); JSON_HDD_BLOCK_RANDOM_READ_COST = setConfigValue(config, PluginConstants.JSON_TABLE_HDD_BLOCK_RANDOM_READ_COST, PluginConstants.JSON_TABLE_HDD_BLOCK_RANDOM_READ_COST_DEFAULT, new greaterThanEquals(JSON_HDD_BLOCK_SEQ_READ_COST)); JSON_BLOCK_SEQ_READ_COST = mediaType.equals(PluginConstants.SSD) ? JSON_SSD_BLOCK_SEQ_READ_COST : JSON_HDD_BLOCK_SEQ_READ_COST; JSON_BLOCK_RANDOM_READ_COST = mediaType.equals(PluginConstants.SSD) ? JSON_SSD_BLOCK_RANDOM_READ_COST : JSON_HDD_BLOCK_RANDOM_READ_COST; }
/** * We need to override this method since Calcite and Drill calculate * joined row count in different ways. It helps avoid a case when * at the first time was used Drill join row count but at the second time * Calcite row count was used. It may happen when * {@link RelMdDistinctRowCount#getDistinctRowCount(Join, RelMetadataQuery, * ImmutableBitSet, RexNode)} method is used and after that used * another getDistinctRowCount method for parent rel, which just uses * row count of input rel node (our join rel). * It causes cost increase of best rel node when * {@link RelSubset#propagateCostImprovements} is called. * * This is a part of the fix for CALCITE-2018. */ @Override public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) { return getDistinctRowCount((RelNode) rel, mq, groupKey, predicate); }
@Override public Double visitPrel(Prel prel, Void value) throws RuntimeException { RelMetadataQuery mq = RelMetadataQuery.instance(); return ((DrillCostBase) mq.getCumulativeCost(prel)).getMemory(); // return findCost(prel, mq); }
private void init() { try { // Get the fullTableRowCount only once i.e. if not already obtained before. if (fullTableRowCount == 0) { final Table t = this.formatPlugin.getJsonTableCache().getTable( scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName()); final MetaTable metaTable = t.getMetaTable(); // For condition null, we get full table stats. com.mapr.db.scan.ScanStats stats = metaTable.getScanStats(); fullTableRowCount = stats.getEstimatedNumRows(); fullTableEstimatedSize = stats.getEstimatedSize(); // MapRDB client can return invalid rowCount i.e. 0, especially right after table // creation. It takes 15 minutes before table stats are obtained and cached in client. // If we get 0 rowCount, fallback to getting rowCount using old admin API. if (fullTableRowCount == 0) { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size(); MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName()); fullTableRowCount = tableStats.getNumRows(); fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize; } } } catch (Exception e) { throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e); } }
private double getNumOfBlocks(double rowCount, double sizeFromDisk, double avgRowSize, PluginCost pluginCostModel) { if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) { return Math.ceil(sizeFromDisk / pluginCostModel.getBlockSize(this)); } else { return Math.ceil(rowCount * avgRowSize / pluginCostModel.getBlockSize(this)); } }
@Override public RelOptCost minus(RelOptCost other) { if (this == INFINITY) { return this; } DrillCostBase that = (DrillCostBase) other; return new DrillCostBase( this.rowCount - that.rowCount, this.cpu - that.cpu, this.io - that.io, this.network - that.network, this.memory - that.memory); }
private int setConfigValue(DrillConfig config, String configPath, int defaultValue, CheckValid check) { int configValue; try { configValue = config.getInt(configPath); if (!check.isValid(configValue)) { configValue = defaultValue; } } catch (Exception ex) { // Use defaults, if config values not present or any other issue configValue = defaultValue; } return configValue; }
private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
@Override public RelOptCost plus(RelOptCost other) { DrillCostBase that = (DrillCostBase) other; if ((this == INFINITY) || (that == INFINITY)) { return INFINITY; } return new DrillCostBase( this.rowCount + that.rowCount, this.cpu + that.cpu, this.io + that.io, this.network + that.network, this.memory + that.memory); }
public RelOptCost makeCost(double dRows, double dCpu, double dIo, double dNetwork, double dMemory) { return new DrillCostBase(dRows, dCpu, dIo, dNetwork, dMemory); }
public RelOptCost makeCost(double dRows, double dCpu, double dIo) { return new DrillCostBase(dRows, dCpu, dIo, 0, 0); }
public RelOptCost makeCost(double dRows, double dCpu, double dIo, double dNetwork) { return new DrillCostBase(dRows, dCpu, dIo, dNetwork, 0); }