double numBlocks = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCost = numBlocks * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); double numBlocksIndex = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCostIndex = numBlocksIndex * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows)/ pluginCost.getBlockSize(primaryTableGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount); double diskCostPrimary = diskBlocksPrimary * pluginCost.getRandomBlockReadCost(primaryTableGroupScan); double diskCostTotal = diskCostIndex + diskCostPrimary;
@Override public ScanStats getScanStats() { // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan double rowCount; PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS: columns.size(); // Get the restricted group scan row count - same as the right side index rows rowCount = computeRestrictedScanRowcount(); // Get the average row size of the primary table double avgRowSize = stats.getAvgRowSize(null, true); if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) { avgRowSize = avgColumnSize * numColumns; } // restricted scan does random lookups and each row may belong to a different block, with the number // of blocks upper bounded by the total num blocks in the primary table double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.min(totalBlocksPrimary, rowCount); double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this); // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor // for biasing towards non-covering plans. diskCost *= stats.getRowKeyJoinBackIOFactor(); logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); }
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); boolean filterPushed = (scanSpec.getSerializedFilter() != null); if (scanSpec != null && scanSpec.getIndexDesc() != null) { double totalBlocks = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.ceil(((avgRowSize * rowsFromDisk)/pluginCostModel.getBlockSize(this))); numBlocks = Math.min(totalBlocks, numBlocks); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this); logger.debug("index_plan_info: JsonIndexGroupScan:{} - indexName:{}: rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, rowsFromDisk {}, diskCost:{}", System.identityHashCode(this), scanSpec.getIndexDesc().getIndexName(), rowCount, avgRowSize, numBlocks, totalBlocks, rowsFromDisk, diskCost);
private ScanStats fullTableScanStats() { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size(); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
private double getNumOfBlocks(double rowCount, double sizeFromDisk, double avgRowSize, PluginCost pluginCostModel) { if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) { return Math.ceil(sizeFromDisk / pluginCostModel.getBlockSize(this)); } else { return Math.ceil(rowCount * avgRowSize / pluginCostModel.getBlockSize(this)); } }
@Override public ScanStats getScanStats() { // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan double rowCount; PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS: columns.size(); // Get the restricted group scan row count - same as the right side index rows rowCount = computeRestrictedScanRowcount(); // Get the average row size of the primary table double avgRowSize = stats.getAvgRowSize(null, true); if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) { avgRowSize = avgColumnSize * numColumns; } // restricted scan does random lookups and each row may belong to a different block, with the number // of blocks upper bounded by the total num blocks in the primary table double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.min(totalBlocksPrimary, rowCount); double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this); // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor // for biasing towards non-covering plans. diskCost *= stats.getRowKeyJoinBackIOFactor(); logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); }
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); boolean filterPushed = (scanSpec.getSerializedFilter() != null); if (scanSpec != null && scanSpec.getIndexDesc() != null) { double totalBlocks = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.ceil(((avgRowSize * rowsFromDisk)/pluginCostModel.getBlockSize(this))); numBlocks = Math.min(totalBlocks, numBlocks); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this); logger.debug("index_plan_info: JsonIndexGroupScan:{} - indexName:{}: rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, rowsFromDisk {}, diskCost:{}", System.identityHashCode(this), scanSpec.getIndexDesc().getIndexName(), rowCount, avgRowSize, numBlocks, totalBlocks, rowsFromDisk, diskCost);
private ScanStats fullTableScanStats() { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size(); double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
private void init() { try { // Get the fullTableRowCount only once i.e. if not already obtained before. if (fullTableRowCount == 0) { final Table t = this.formatPlugin.getJsonTableCache().getTable( scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName()); final MetaTable metaTable = t.getMetaTable(); // For condition null, we get full table stats. com.mapr.db.scan.ScanStats stats = metaTable.getScanStats(); fullTableRowCount = stats.getEstimatedNumRows(); fullTableEstimatedSize = stats.getEstimatedSize(); // MapRDB client can return invalid rowCount i.e. 0, especially right after table // creation. It takes 15 minutes before table stats are obtained and cached in client. // If we get 0 rowCount, fallback to getting rowCount using old admin API. if (fullTableRowCount == 0) { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size(); MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName()); fullTableRowCount = tableStats.getNumRows(); fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize; } } } catch (Exception e) { throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e); } }
private double getNumOfBlocks(double rowCount, double sizeFromDisk, double avgRowSize, PluginCost pluginCostModel) { if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) { return Math.ceil(sizeFromDisk / pluginCostModel.getBlockSize(this)); } else { return Math.ceil(rowCount * avgRowSize / pluginCostModel.getBlockSize(this)); } }
double numBlocks = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCost = numBlocks * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); double numBlocksIndex = Math.ceil((leadRowCount * avgRowSize)/pluginCost.getBlockSize(primaryTableGroupScan)); double diskCostIndex = numBlocksIndex * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan); pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows)/ pluginCost.getBlockSize(primaryTableGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount); double diskCostPrimary = diskBlocksPrimary * pluginCost.getRandomBlockReadCost(primaryTableGroupScan); double diskCostTotal = diskCostIndex + diskCostPrimary;
private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
Preconditions.checkArgument(primaryTableGroupScan instanceof DbGroupScan); double numBlocksIndex = Math.ceil((leadRowCount * avgRowSize) / costBase.getBlockSize(primaryTableGroupScan)); double diskCostIndex = numBlocksIndex * costBase.getSequentialBlockReadCost(primaryTableGroupScan); totDiskCost += diskCostIndex; costBase.getAverageColumnSize(dbGroupScan) * totalRows) / costBase.getBlockSize(dbGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, totLeadRowCount); double diskCostPrimary = diskBlocksPrimary * costBase.getRandomBlockReadCost(dbGroupScan); totDiskCost += diskCostPrimary;
private void init() { try { // Get the fullTableRowCount only once i.e. if not already obtained before. if (fullTableRowCount == 0) { final Table t = this.formatPlugin.getJsonTableCache().getTable( scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName()); final MetaTable metaTable = t.getMetaTable(); // For condition null, we get full table stats. com.mapr.db.scan.ScanStats stats = metaTable.getScanStats(); fullTableRowCount = stats.getEstimatedNumRows(); fullTableEstimatedSize = stats.getEstimatedSize(); // MapRDB client can return invalid rowCount i.e. 0, especially right after table // creation. It takes 15 minutes before table stats are obtained and cached in client. // If we get 0 rowCount, fallback to getting rowCount using old admin API. if (fullTableRowCount == 0) { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size(); MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName()); fullTableRowCount = tableStats.getNumRows(); fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize; } } } catch (Exception e) { throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e); } }