private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
@Override public ScanStats getScanStats() { // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan double rowCount; PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS: columns.size(); // Get the restricted group scan row count - same as the right side index rows rowCount = computeRestrictedScanRowcount(); // Get the average row size of the primary table double avgRowSize = stats.getAvgRowSize(null, true); if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) { avgRowSize = avgColumnSize * numColumns; } // restricted scan does random lookups and each row may belong to a different block, with the number // of blocks upper bounded by the total num blocks in the primary table double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.min(totalBlocksPrimary, rowCount); double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this); // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor // for biasing towards non-covering plans. diskCost *= stats.getRowKeyJoinBackIOFactor(); logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); }
private ScanStats fullTableScanStats() { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); boolean filterPushed = (scanSpec.getSerializedFilter() != null); if (scanSpec != null && scanSpec.getIndexDesc() != null) {
pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows)/ pluginCost.getBlockSize(primaryTableGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount);
private void init() { try { // Get the fullTableRowCount only once i.e. if not already obtained before. if (fullTableRowCount == 0) { final Table t = this.formatPlugin.getJsonTableCache().getTable( scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName()); final MetaTable metaTable = t.getMetaTable(); // For condition null, we get full table stats. com.mapr.db.scan.ScanStats stats = metaTable.getScanStats(); fullTableRowCount = stats.getEstimatedNumRows(); fullTableEstimatedSize = stats.getEstimatedSize(); // MapRDB client can return invalid rowCount i.e. 0, especially right after table // creation. It takes 15 minutes before table stats are obtained and cached in client. // If we get 0 rowCount, fallback to getting rowCount using old admin API. if (fullTableRowCount == 0) { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size(); MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName()); fullTableRowCount = tableStats.getNumRows(); fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize; } } } catch (Exception e) { throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e); } }
private double getNetworkCost(double leadRowCount, int numProjectedFields, boolean isCovering, GroupScan primaryTableGroupScan) { if (isCovering) { // db server will send only the projected columns to the db client for the selected // number of rows, so network cost is based on the number of actual projected columns double networkCost = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCost; } else { // only the rowkey column is projected from the index and sent over the network double networkCostIndex = leadRowCount * 1 * pluginCost.getAverageColumnSize(primaryTableGroupScan); // after join-back to primary table, all projected columns are sent over the network double networkCostPrimary = leadRowCount * numProjectedFields * pluginCost.getAverageColumnSize(primaryTableGroupScan); return networkCostIndex + networkCostPrimary; } }
@Override public ScanStats getScanStats() { // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan double rowCount; PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS: columns.size(); // Get the restricted group scan row count - same as the right side index rows rowCount = computeRestrictedScanRowcount(); // Get the average row size of the primary table double avgRowSize = stats.getAvgRowSize(null, true); if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) { avgRowSize = avgColumnSize * numColumns; } // restricted scan does random lookups and each row may belong to a different block, with the number // of blocks upper bounded by the total num blocks in the primary table double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount)/pluginCostModel.getBlockSize(this)); double numBlocks = Math.min(totalBlocksPrimary, rowCount); double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this); // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor // for biasing towards non-covering plans. diskCost *= stats.getRowKeyJoinBackIOFactor(); logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); }
private ScanStats fullTableScanStats() { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); boolean filterPushed = (scanSpec.getSerializedFilter() != null); if (scanSpec != null && scanSpec.getIndexDesc() != null) {
pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows)/ pluginCost.getBlockSize(primaryTableGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount);
costBase.getAverageColumnSize(dbGroupScan) * totalRows) / costBase.getBlockSize(dbGroupScan)); double diskBlocksPrimary = Math.min(totalBlocksPrimary, totLeadRowCount); double diskCostPrimary = diskBlocksPrimary * costBase.getRandomBlockReadCost(dbGroupScan);
private void init() { try { // Get the fullTableRowCount only once i.e. if not already obtained before. if (fullTableRowCount == 0) { final Table t = this.formatPlugin.getJsonTableCache().getTable( scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName()); final MetaTable metaTable = t.getMetaTable(); // For condition null, we get full table stats. com.mapr.db.scan.ScanStats stats = metaTable.getScanStats(); fullTableRowCount = stats.getEstimatedNumRows(); fullTableEstimatedSize = stats.getEstimatedSize(); // MapRDB client can return invalid rowCount i.e. 0, especially right after table // creation. It takes 15 minutes before table stats are obtained and cached in client. // If we get 0 rowCount, fallback to getting rowCount using old admin API. if (fullTableRowCount == 0) { PluginCost pluginCostModel = formatPlugin.getPluginCostModel(); final int avgColumnSize = pluginCostModel.getAverageColumnSize(this); final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size(); MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName()); fullTableRowCount = tableStats.getNumRows(); fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize; } } } catch (Exception e) { throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e); } }