@JsonIgnore public Path getPrimaryTablePath() { return (this.indexDesc == null) ? null : new Path(this.indexDesc.getPrimaryTablePath()); }
/** * Get the row count after applying the {@link RexNode} condition * @param condition, filter to apply * @return row count post filtering */ @Override @JsonIgnore public double getRowCount(RexNode condition, RelNode scanRel) { // Do not use statistics if row count is forced. Forced rowcounts take precedence over stats double rowcount; if (forcedRowCountMap.get(condition) != null) { return forcedRowCountMap.get(condition); } if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowcount = stats.getRowCount(condition, idxIdentifier, scanRel); } else { rowcount = stats.getRowCount(condition, null, scanRel); } // Stats might NOT have the full rows (e.g. table is newly populated and DB stats APIs return it after // 15 mins). Use the table rows as populated using the (expensive but accurate) Hbase API if needed. if (condition == null && (rowcount == 0 || rowcount == ROWCOUNT_UNKNOWN)) { rowcount = fullTableRowCount; logger.debug("getRowCount: Stats not available yet! Use Admin APIs full table rowcount {}", fullTableRowCount); } return rowcount; }
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>(); if (isIndexScan()) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); if (stats.isStatsAvailable()) {
@Override public boolean isDistributed() { // getMaxParallelizationWidth gets information about all regions to scan and is expensive. // This option is meant to be used only for unit tests. boolean useNumRegions = storagePlugin.getContext().getConfig().getBoolean(PluginConstants.JSON_TABLE_USE_NUM_REGIONS_FOR_DISTRIBUTION_PLANNING); double fullTableSize; if (useNumRegions) { return getMaxParallelizationWidth() > 1 ? true: false; } // This function gets called multiple times during planning. To avoid performance // bottleneck, estimate degree of parallelization using stats instead of actually getting information // about all regions. double rowCount, rowSize; double scanRangeSize = storagePlugin.getContext().getConfig().getInt(PluginConstants.JSON_TABLE_SCAN_SIZE_MB) * 1024 * 1024; if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); rowSize = stats.getAvgRowSize(idxIdentifier, false); } else { rowCount = stats.getRowCount(scanSpec.getCondition(), null); rowSize = stats.getAvgRowSize(null, false); } if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0 || rowSize == AVG_ROWSIZE_UNKNOWN || rowSize == 0) { fullTableSize = (scanSpec.getSerializedFilter() != null ? .5 : 1) * this.fullTableEstimatedSize; } else { fullTableSize = rowCount * rowSize; } return (long) fullTableSize / scanRangeSize > 1 ? true : false; }
@JsonIgnore public Path getPrimaryTablePath() { return (this.indexDesc == null) ? null : new Path(this.indexDesc.getPrimaryTablePath()); }
/** * Get the row count after applying the {@link RexNode} condition * @param condition, filter to apply * @return row count post filtering */ @Override @JsonIgnore public double getRowCount(RexNode condition, RelNode scanRel) { // Do not use statistics if row count is forced. Forced rowcounts take precedence over stats double rowcount; if (forcedRowCountMap.get(condition) != null) { return forcedRowCountMap.get(condition); } if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowcount = stats.getRowCount(condition, idxIdentifier, scanRel); } else { rowcount = stats.getRowCount(condition, null, scanRel); } // Stats might NOT have the full rows (e.g. table is newly populated and DB stats APIs return it after // 15 mins). Use the table rows as populated using the (expensive but accurate) Hbase API if needed. if (condition == null && (rowcount == 0 || rowcount == ROWCOUNT_UNKNOWN)) { rowcount = fullTableRowCount; logger.debug("getRowCount: Stats not available yet! Use Admin APIs full table rowcount {}", fullTableRowCount); } return rowcount; }
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>(); if (isIndexScan()) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); if (stats.isStatsAvailable()) {
@Override public boolean isDistributed() { // getMaxParallelizationWidth gets information about all regions to scan and is expensive. // This option is meant to be used only for unit tests. boolean useNumRegions = storagePlugin.getContext().getConfig().getBoolean(PluginConstants.JSON_TABLE_USE_NUM_REGIONS_FOR_DISTRIBUTION_PLANNING); double fullTableSize; if (useNumRegions) { return getMaxParallelizationWidth() > 1 ? true: false; } // This function gets called multiple times during planning. To avoid performance // bottleneck, estimate degree of parallelization using stats instead of actually getting information // about all regions. double rowCount, rowSize; double scanRangeSize = storagePlugin.getContext().getConfig().getInt(PluginConstants.JSON_TABLE_SCAN_SIZE_MB) * 1024 * 1024; if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); rowSize = stats.getAvgRowSize(idxIdentifier, false); } else { rowCount = stats.getRowCount(scanSpec.getCondition(), null); rowSize = stats.getAvgRowSize(null, false); } if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0 || rowSize == AVG_ROWSIZE_UNKNOWN || rowSize == 0) { fullTableSize = (scanSpec.getSerializedFilter() != null ? .5 : 1) * this.fullTableEstimatedSize; } else { fullTableSize = rowCount * rowSize; } return (long) fullTableSize / scanRangeSize > 1 ? true : false; }