@Override public Set<StoragePluginOptimizerRule> getPhysicalOptimizerRules(OptimizerRulesContext optimizerRulesContext) { ImmutableSet.Builder<StoragePluginOptimizerRule> ruleBuilder = ImmutableSet.builder(); OptionManager options = optimizerRulesContext.getPlannerSettings().getOptions(); // TODO: Remove implicit using of convert_fromTIMESTAMP_IMPALA function // once "store.parquet.reader.int96_as_timestamp" will be true by default if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS) || options.getBoolean(ExecConstants.HIVE_OPTIMIZE_PARQUET_SCAN_WITH_NATIVE_READER)) { ruleBuilder.add(ConvertHiveParquetScanToDrillParquetScan.INSTANCE); } if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_MAPRDB_JSON_SCAN_WITH_NATIVE_READER)) { try { Class<?> hiveToDrillMapRDBJsonRuleClass = Class.forName("org.apache.drill.exec.planner.sql.logical.ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan"); ruleBuilder.add((StoragePluginOptimizerRule) hiveToDrillMapRDBJsonRuleClass.getField("INSTANCE").get(null)); } catch (ReflectiveOperationException e) { logger.warn("Current Drill build is not designed for working with Hive MapR-DB tables. " + "Please disable {} option", ExecConstants.HIVE_OPTIMIZE_MAPRDB_JSON_SCAN_WITH_NATIVE_READER); } } return ruleBuilder.build(); }
@Override public TypeProtos.MajorType getVectorType(SchemaPath column, PlannerSettings plannerSettings) { HiveScan hiveScan = (HiveScan) scanRel.getGroupScan(); String partitionName = column.getAsNamePart().getName(); Map<String, String> partitionNameTypeMap = hiveScan.getHiveReadEntry().table.getPartitionNameTypeMap(); String hiveType = partitionNameTypeMap.get(partitionName); PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hiveType); TypeProtos.MinorType partitionType = HiveUtilities.getMinorTypeFromHivePrimitiveTypeInfo(primitiveTypeInfo, plannerSettings.getOptions()); return TypeProtos.MajorType.newBuilder().setMode(TypeProtos.DataMode.OPTIONAL).setMinorType(partitionType).build(); }
final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel, logicalInputSplits, settings.getOptions()); if (hiveScanRel.getRowType().getFieldCount() == 0) { call.transformTo(nativeScanRel);
public static RelOptCost computeHashJoinCostWithRowCntKeySize(RelOptPlanner planner, double probeRowCount, double buildRowCount, int keySize) { // cpu cost of hashing the join keys for the build side double cpuCostBuild = DrillCostBase.HASH_CPU_COST * keySize * buildRowCount; // cpu cost of hashing the join keys for the probe side double cpuCostProbe = DrillCostBase.HASH_CPU_COST * keySize * probeRowCount; // cpu cost of evaluating each leftkey=rightkey join condition double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * keySize; double factor = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val; long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; // table + hashValues + links double memCost = ( (fieldWidth * keySize) + IntHolder.WIDTH + IntHolder.WIDTH ) * buildRowCount * factor; double cpuCost = joinConditionCost * (probeRowCount) // probe size determine the join condition comparison cost + cpuCostBuild + cpuCostProbe; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost); }
@Override protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) { long data = 0; for (final CompleteFileWork work : scan.getWorkIterable()) { data += work.getTotalBytes(); } final double estimatedRowSize = settings.getOptions().getOption(ExecConstants.TEXT_ESTIMATED_ROW_SIZE); final double estRowCount = data / estimatedRowSize; return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, (long) estRowCount, 1, data); }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { DrillCostBase.DrillCostFactory costFactory = (DrillCostBase.DrillCostFactory) planner.getCostFactory(); double rowCount = estimateRowCount(mq); long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getLong(ExecConstants.AVERAGE_FIELD_WIDTH_KEY); double rowSize = left.getRowType().getFieldList().size() * fieldWidth; double cpuCost = rowCount * rowSize * DrillCostBase.BASE_CPU_COST; double memCost = !excludeCorrelateColumn ? CORRELATE_MEM_COPY_COST : 0.0; return costFactory.makeCost(rowCount, cpuCost, 0, 0, memCost); }
@Override public boolean matches(RelOptRuleCall call) { boolean topNEnabled = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(PlannerSettings.TOPN.getOptionName()).bool_val; if (!topNEnabled) { return false; } else { // If no limit is defined it doesn't make sense to use TopN since it could use unbounded memory in this case. // We should use the sort and limit operator in this case. // This also fixes DRILL-6474 final LimitPrel limit = call.rel(0); return limit.getFetch() != null; } }
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet(); final GroupScan oldGrpScan = scan.getGroupScan(); final long totalRecordCount = (long)oldGrpScan.getScanStats(settings).getRecordCount(); SchemaPath simplePath = SchemaPath.getSimplePath(columnName); if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) { return ImmutableMap.of();
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { //We use multiplier 0.05 for TopN operator, and 0.1 for Sort, to make TopN a preferred choice. return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); // int rowWidth = child.getRowType().getPrecision(); int numSortFields = this.collation.getFieldCollations().size(); double cpuCost = DrillCostBase.COMPARE_CPU_COST * numSortFields * inputRows * (Math.log(inputRows)/Math.log(2)); double diskIOCost = 0; // assume in-memory for now until we enforce operator-level memory constraints // TODO: use rowWidth instead of avgFieldWidth * numFields // avgFieldWidth * numFields * inputRows double numFields = this.getRowType().getFieldCount(); long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; double memCost = fieldWidth * numFields * inputRows; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0, memCost); }
double factor = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.HASH_AGG_TABLE_FACTOR_KEY).float_val; long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions(), false); optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS));
@Override public void onMatch(RelOptRuleCall call) { final DrillWriterRel writer = call.rel(0); final RelNode input = call.rel(1); final List<Integer> keys = writer.getPartitionKeys(); final RelCollation collation = getCollation(keys); final boolean hashDistribute = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(ExecConstants.CTAS_PARTITIONING_HASH_DISTRIBUTE_VALIDATOR); final RelTraitSet traits = hashDistribute ? input.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation).plus(getDistribution(keys)) : input.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation); final RelNode convertedInput = convert(input, traits); if (!new WriteTraitPull(call).go(writer, convertedInput)) { DrillWriterRelBase newWriter = new WriterPrel(writer.getCluster(), convertedInput.getTraitSet(), convertedInput, writer.getCreateTableEntry()); call.transformTo(newWriter); } }
if (context.getPlannerSettings().getOptions() .getBoolean(ENABLE_DECIMAL_DATA_TYPE.getOptionName())) { if (isLiteralNull(literal)) {
break; case "DECIMAL": if (!context.getPlannerSettings().getOptions().getOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY).bool_val) { throw UserException .unsupportedError()