final String partitionColumnLabel = settings.getFsPartitionColumnLabel(); final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel, logicalInputSplits, settings.getOptions()); if (hiveScanRel.getRowType().getFieldCount() == 0) { call.transformTo(nativeScanRel);
@Override public boolean matches(RelOptRuleCall call) { final DrillScanRel scan = (DrillScanRel) call.rel(2); GroupScan groupScan = scan.getGroupScan(); // this rule is applicable only for Hive based partition pruning if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) { return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown(); } else { return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown(); } }
@Override public Set<StoragePluginOptimizerRule> getPhysicalOptimizerRules(OptimizerRulesContext optimizerRulesContext) { ImmutableSet.Builder<StoragePluginOptimizerRule> ruleBuilder = ImmutableSet.builder(); OptionManager options = optimizerRulesContext.getPlannerSettings().getOptions(); // TODO: Remove implicit using of convert_fromTIMESTAMP_IMPALA function // once "store.parquet.reader.int96_as_timestamp" will be true by default if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS) || options.getBoolean(ExecConstants.HIVE_OPTIMIZE_PARQUET_SCAN_WITH_NATIVE_READER)) { ruleBuilder.add(ConvertHiveParquetScanToDrillParquetScan.INSTANCE); } if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_MAPRDB_JSON_SCAN_WITH_NATIVE_READER)) { try { Class<?> hiveToDrillMapRDBJsonRuleClass = Class.forName("org.apache.drill.exec.planner.sql.logical.ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan"); ruleBuilder.add((StoragePluginOptimizerRule) hiveToDrillMapRDBJsonRuleClass.getField("INSTANCE").get(null)); } catch (ReflectiveOperationException e) { logger.warn("Current Drill build is not designed for working with Hive MapR-DB tables. " + "Please disable {} option", ExecConstants.HIVE_OPTIMIZE_MAPRDB_JSON_SCAN_WITH_NATIVE_READER); } } return ruleBuilder.build(); }
context.getPlannerSettings().isTypeInferenceEnabled() && FindLimit0Visitor.containsLimit0(relNode)) { context.getPlannerSettings().forceSingleMode(); if (!context.getPlannerSettings().isHepOptEnabled()) { final RelNode intermediateNode2; final RelNode intermediateNode3; if (context.getPlannerSettings().isHepPartitionPruningEnabled()) { if (context.getPlannerSettings().isRowKeyJoinConversionEnabled()) { context.getPlannerSettings().forceSingleMode(); if (context.getOptions().getOption(ExecConstants.LATE_LIMIT0_OPT)) { return FindLimit0Visitor.addLimitOnTopOfLeafNodes(drillRel);
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { //We use multiplier 0.05 for TopN operator, and 0.1 for Sort, to make TopN a preferred choice. return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); double inputRows = mq.getRowCount(child); // int rowWidth = child.getRowType().getPrecision(); int numSortFields = this.collation.getFieldCollations().size(); double cpuCost = DrillCostBase.COMPARE_CPU_COST * numSortFields * inputRows * (Math.log(inputRows)/Math.log(2)); double diskIOCost = 0; // assume in-memory for now until we enforce operator-level memory constraints // TODO: use rowWidth instead of avgFieldWidth * numFields // avgFieldWidth * numFields * inputRows double numFields = this.getRowType().getFieldCount(); long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; double memCost = fieldWidth * numFields * inputRows; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0, memCost); }
public QueryContext(final UserSession session, final DrillbitContext drillbitContext, QueryId queryId) { this.drillbitContext = drillbitContext; this.session = session; this.queryId = queryId; queryOptions = new QueryOptionManager(session.getOptions()); executionControls = new ExecutionControls(queryOptions, drillbitContext.getEndpoint()); plannerSettings = new PlannerSettings(queryOptions, getFunctionRegistry()); plannerSettings.setNumEndPoints(drillbitContext.getBits().size()); // If we do not need to support dynamic UDFs for this query, just use static operator table // built at the startup. Else, build new operator table from latest version of function registry. if (queryOptions.getOption(ExecConstants.USE_DYNAMIC_UDFS)) { this.table = new DrillOperatorTable(drillbitContext.getFunctionImplementationRegistry(), drillbitContext.getOptionManager()); } else { this.table = drillbitContext.getOperatorTable(); } queryContextInfo = Utilities.createQueryContextInfo(session.getDefaultSchemaPath(), session.getSessionId()); contextInformation = new ContextInformation(session.getCredentials(), queryContextInfo); allocator = drillbitContext.getAllocator().newChildAllocator( "query:" + QueryIdHelper.getQueryId(queryId), PlannerSettings.getInitialPlanningMemorySize(), plannerSettings.getPlanningMemoryLimit()); bufferManager = new BufferManagerImpl(this.allocator); viewExpansionContext = new ViewExpansionContext(this); schemaTreeProvider = new SchemaTreeProvider(drillbitContext); constantValueHolderCache = Maps.newHashMap(); }
if (context.getPlannerSettings().isMemoryEstimationEnabled() && !MemoryEstimationVisitor.enoughMemory(phyRelNode, queryOptions, context.getActiveEndpoints().size())) { log("Not enough memory for this plan", phyRelNode, logger, null); if (context.getPlannerSettings().isHashJoinSwapEnabled()) { phyRelNode = SwapHashJoinVisitor.swapHashJoin(phyRelNode, context.getPlannerSettings().getHashJoinSwapMarginFactor()); if (context.getPlannerSettings().isParquetRowGroupFilterPushdownPlanningEnabled()) { phyRelNode = (Prel) transform(PlannerType.HEP_BOTTOM_UP, PlannerPhase.PHYSICAL_PARTITION_PRUNING, phyRelNode);
/** * In a BroadcastExchange, each sender is sending data to N receivers (for costing * purposes we assume it is also sending to itself). */ @Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); } RelNode child = this.getInput(); final int numEndPoints = PrelUtil.getSettings(getCluster()).numEndPoints(); final double broadcastFactor = PrelUtil.getSettings(getCluster()).getBroadcastFactor(); final double inputRows = mq.getRowCount(child); final int rowWidth = child.getRowType().getFieldCount() * DrillCostBase.AVG_FIELD_WIDTH; final double cpuCost = broadcastFactor * DrillCostBase.SVR_CPU_COST * inputRows; // We assume localhost network cost is 1/10 of regular network cost // ( c * num_bytes * (N - 1) ) + ( c * num_bytes * 0.1) // = c * num_bytes * (N - 0.9) // TODO: a similar adjustment should be made to HashExchangePrel final double networkCost = broadcastFactor * DrillCostBase.BYTE_NETWORK_COST * inputRows * rowWidth * (numEndPoints - 0.9); return new DrillCostBase(inputRows, cpuCost, 0, networkCost); }
protected DrillFilterRelBase(Convention convention, RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) { super(cluster, traits, child, condition); assert getConvention() == convention; // save the number of conjuncts that make up the filter condition such // that repeated calls to the costing function can use the saved copy conjunctions = RelOptUtil.conjunctions(condition); numConjuncts = conjunctions.size(); // assert numConjuncts >= 1; filterMinSelectivityEstimateFactor = PrelUtil. getPlannerSettings(cluster.getPlanner()).getFilterMinSelectivityEstimateFactor(); filterMaxSelectivityEstimateFactor = PrelUtil. getPlannerSettings(cluster.getPlanner()).getFilterMaxSelectivityEstimateFactor(); }
public FileSystemPartitionDescriptor(PlannerSettings settings, TableScan scanRel) { Preconditions.checkArgument(scanRel instanceof DrillScanRel || scanRel instanceof EnumerableTableScan); this.partitionLabel = settings.getFsPartitionColumnLabel(); this.partitionLabelLength = partitionLabel.length(); this.scanRel = scanRel; DrillTable unwrap; unwrap = scanRel.getTable().unwrap(DrillTable.class); if (unwrap == null) { unwrap = scanRel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable(); } table = unwrap; for(int i =0; i < 10; i++){ partitions.put(partitionLabel + i, i); } }
protected boolean checkBroadcastConditions(RelOptPlanner planner, DrillJoin join, RelNode left, RelNode right) { double estimatedRightRowCount = RelMetadataQuery.instance().getRowCount(right); if (estimatedRightRowCount < PrelUtil.getSettings(join.getCluster()).getBroadcastThreshold() && ! left.getTraitSet().getTrait(DrillDistributionTraitDef.INSTANCE).equals(DrillDistributionTrait.SINGLETON) && (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT) ) { return true; } return false; }
@Override public void run() { final GetServerMetaResp.Builder respBuilder = GetServerMetaResp.newBuilder(); try { final ServerMeta.Builder metaBuilder = ServerMeta.newBuilder(DEFAULT); PlannerSettings plannerSettings = new PlannerSettings(session.getOptions(), context.getFunctionImplementationRegistry()); DrillParserConfig config = new DrillParserConfig(plannerSettings); int identifierMaxLength = config.identifierMaxLength(); Metadata metadata = SqlParser.create("", config).getMetadata(); metaBuilder .setMaxCatalogNameLength(identifierMaxLength) .setMaxColumnNameLength(identifierMaxLength) .setMaxCursorNameLength(identifierMaxLength) .setMaxSchemaNameLength(identifierMaxLength) .setMaxTableNameLength(identifierMaxLength) .setMaxUserNameLength(identifierMaxLength) .setIdentifierQuoteString(config.quoting().string) .setIdentifierCasing(getIdentifierCasing(config.unquotedCasing(), config.caseSensitive())) .setQuotedIdentifierCasing(getIdentifierCasing(config.quotedCasing(), config.caseSensitive())) .addAllSqlKeywords(Splitter.on(",").split(metadata.getJdbcKeywords())); respBuilder.setServerMeta(metaBuilder); respBuilder.setStatus(RequestStatus.OK); } catch(Throwable t) { respBuilder.setStatus(RequestStatus.FAILED); respBuilder.setError(MetadataProvider.createPBError("server meta", t)); } finally { responseSender.send(new Response(RpcType.SERVER_META, respBuilder.build())); } }
@Override public TypeProtos.MajorType getVectorType(SchemaPath column, PlannerSettings plannerSettings) { HiveScan hiveScan = (HiveScan) scanRel.getGroupScan(); String partitionName = column.getAsNamePart().getName(); Map<String, String> partitionNameTypeMap = hiveScan.getHiveReadEntry().table.getPartitionNameTypeMap(); String hiveType = partitionNameTypeMap.get(partitionName); PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hiveType); TypeProtos.MinorType partitionType = HiveUtilities.getMinorTypeFromHivePrimitiveTypeInfo(primitiveTypeInfo, plannerSettings.getOptions()); return TypeProtos.MajorType.newBuilder().setMode(TypeProtos.DataMode.OPTIONAL).setMinorType(partitionType).build(); }
if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) { return super.computeSelfCost(planner, mq).multiplyBy(.1); double factor = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.HASH_AGG_TABLE_FACTOR_KEY).float_val; long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
@Override protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) { long data = 0; for (final CompleteFileWork work : scan.getWorkIterable()) { data += work.getTotalBytes(); } final double estimatedRowSize = settings.getOptions().getOption(ExecConstants.TEXT_ESTIMATED_ROW_SIZE); final double estRowCount = data / estimatedRowSize; return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, (long) estRowCount, 1, data); }
@Override public boolean matches(RelOptRuleCall call) { final DrillScanRel scan = (DrillScanRel) call.rel(1); GroupScan groupScan = scan.getGroupScan(); // this rule is applicable only for Hive based partition pruning if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) { return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown(); } else { return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown(); } }
public static RelOptCost computeHashJoinCostWithRowCntKeySize(RelOptPlanner planner, double probeRowCount, double buildRowCount, int keySize) { // cpu cost of hashing the join keys for the build side double cpuCostBuild = DrillCostBase.HASH_CPU_COST * keySize * buildRowCount; // cpu cost of hashing the join keys for the probe side double cpuCostProbe = DrillCostBase.HASH_CPU_COST * keySize * probeRowCount; // cpu cost of evaluating each leftkey=rightkey join condition double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * keySize; double factor = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val; long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val; // table + hashValues + links double memCost = ( (fieldWidth * keySize) + IntHolder.WIDTH + IntHolder.WIDTH ) * buildRowCount * factor; double cpuCost = joinConditionCost * (probeRowCount) // probe size determine the join condition comparison cost + cpuCostBuild + cpuCostProbe; DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory(); return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost); }
@Override public boolean matches(RelOptRuleCall call) { final DrillScanRel scan = call.rel(2); GroupScan groupScan = scan.getGroupScan(); // this rule is applicable only for parquet based partition pruning if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) { return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown(); } else { return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown(); } }
@Override public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { DrillCostBase.DrillCostFactory costFactory = (DrillCostBase.DrillCostFactory) planner.getCostFactory(); double rowCount = estimateRowCount(mq); long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions() .getLong(ExecConstants.AVERAGE_FIELD_WIDTH_KEY); double rowSize = left.getRowType().getFieldList().size() * fieldWidth; double cpuCost = rowCount * rowSize * DrillCostBase.BASE_CPU_COST; double memCost = !excludeCorrelateColumn ? CORRELATE_MEM_COPY_COST : 0.0; return costFactory.makeCost(rowCount, cpuCost, 0, 0, memCost); }
@Override public boolean matches(RelOptRuleCall call) { final DrillScanRel scan = call.rel(1); GroupScan groupScan = scan.getGroupScan(); // this rule is applicable only for parquet based partition pruning if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) { return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown(); } else { return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown(); } }