@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { List<DatasetSplit> splits = new ArrayList<>(work.size()); BatchSchema schema = getDataset().getSchema(); for(SplitWork split : work){ splits.add(split.getSplit()); } boolean storageImpersonationEnabled = dataset.getStoragePluginId().getCapabilities().getCapability(SourceCapabilities.STORAGE_IMPERSONATION); String userName = storageImpersonationEnabled ? getUserName() : ImpersonationUtil.getProcessUserName(); final ReadDefinition readDefinition = dataset.getReadDefinition(); return new HiveSubScan(splits, userName, schema, dataset.getName().getPathComponents(), filter, dataset.getStoragePluginId(), columns, readDefinition.getPartitionColumnsList()); }
public ElasticIntermediateScanPrel( RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table, TableMetadata dataset, List<SchemaPath> projectedColumns, double observedRowcountAdjustment) { super(cluster, traits(cluster, table.getRowCount(), dataset.getSplitCount(), traitSet), table, dataset.getStoragePluginId(), dataset, projectedColumns, observedRowcountAdjustment); }
public boolean isFilterPushed() { try { return filter != null || getTableMetadata().getSplitRatio() < 1.0d; }catch(Exception ex) { throw Throwables.propagate(ex); } }
public InfoSchemaScanPrel( RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table, TableMetadata dataset, SearchQuery query, List<SchemaPath> projectedColumns, double observedRowcountAdjustment ) { super(cluster, traitSet, table, dataset.getStoragePluginId(), dataset, projectedColumns, observedRowcountAdjustment); this.pluginId = dataset.getStoragePluginId(); this.table = Preconditions.checkNotNull(InfoSchemaStoragePlugin.TABLE_MAP.get(dataset.getName().getName().toLowerCase()), "Unexpected system table."); this.query = query; }
@Override public void write(final Kryo kryo, final Output output, final TableMetadata table) { try{ Preconditions.checkArgument(!table.isPruned(), "Cannot serialize a pruned table."); }catch(NamespaceException ex){ throw Throwables.propagate(ex); } kryo.writeObject(output, table.getName().getPathComponents()); }
private Prel newScan(RelDataType rowType, double rowCount, double splitRatio) throws Exception { TableMetadata metadata = Mockito.mock(TableMetadata.class); when(metadata.getName()).thenReturn(new NamespaceKey(ImmutableList.of("sys", "version"))); when(metadata.getSchema()).thenReturn(SystemTable.VERSION.getSchema()); when(metadata.getSplitRatio()).thenReturn(splitRatio); StoragePluginId pluginId = new StoragePluginId(new SourceConfig().setConfig(new SystemPluginConf().toBytesString()), new SystemPluginConf(), SourceCapabilities.NONE); when(metadata.getStoragePluginId()).thenReturn(pluginId); List<SchemaPath> columns = FluentIterable.from(SystemTable.VERSION.getSchema()).transform(input -> SchemaPath.getSimplePath(input.getName())).toList(); final RelOptTable relOptTable = Mockito.mock(RelOptTable.class); when(relOptTable.getRowCount()).thenReturn(rowCount); return new SystemScanPrel(cluster, traits, relOptTable, metadata, columns, 1.0d, rowType); }
private Prel newHardScan(RelDataType rowType) { TableMetadata metadata = Mockito.mock(TableMetadata.class); when(metadata.getName()).thenReturn(new NamespaceKey(ImmutableList.of("sys", "memory"))); when(metadata.getSchema()).thenReturn(SystemTable.MEMORY.getSchema()); StoragePluginId pluginId = new StoragePluginId(new SourceConfig().setConfig(new SystemPluginConf().toBytesString()), new SystemPluginConf(), SourceCapabilities.NONE); when(metadata.getStoragePluginId()).thenReturn(pluginId); List<SchemaPath> columns = FluentIterable.from(SystemTable.MEMORY.getSchema()).transform(new Function<Field, SchemaPath>(){ @Override public SchemaPath apply(Field input) { return SchemaPath.getSimplePath(input.getName()); }}).toList(); return new SystemScanPrel(cluster, traits, Mockito.mock(RelOptTable.class), metadata, columns, 1.0d, rowType); }
@Override public NamespaceKey getPath() { return dataset.getName(); }
public StoragePluginId getStoragePluginId() { return dataset.getStoragePluginId(); }
@Override public RelWriter explainTerms(RelWriter pw) { pw.item("table", tableMetadata.getName()); if(projectedColumns != null){ pw.item("columns", FluentIterable.from(projectedColumns).transform(new Function<SchemaPath, String>(){ @Override public String apply(SchemaPath input) { return input.toString(); }}).join(Joiner.on(", "))); } pw.item("splits", getTableMetadata().getSplitCount()); if(observedRowcountAdjustment != 1.0d){ pw.item("rowAdjust", observedRowcountAdjustment); } // we need to include the table metadata digest since not all properties (specifically which splits) are included in the explain output (what base computeDigest uses). pw.itemIf("tableDigest", tableMetadata.computeDigest(), pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES); return pw; }
@Override public BatchSchema getSchema() { return dataset.getSchema(); }
@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { List<DatasetSplit> splitWork = FluentIterable.from(work).transform(new Function<SplitWork, DatasetSplit>(){ @Override public DatasetSplit apply(SplitWork input) { return input.getSplit(); }}).toList(); return new ElasticsearchSubScan( getUserName(), getDataset().getStoragePluginId(), spec, splitWork, getColumns(), Iterables.getOnlyElement(getReferencedTables()), getSchema(), getDataset().getReadDefinition().getExtendedProperty() ); }
@Override public boolean matches(RelOptRuleCall call) { final ScanRelBase scan = call.rel(2); if (scan.getPluginId().getType().equals(pluginType)) { try { if(scan.getTableMetadata().getSplitRatio() == 1.0d){ final List<String> partitionColumns = scan.getTableMetadata().getReadDefinition().getPartitionColumnsList(); return partitionColumns != null && !partitionColumns.isEmpty(); } } catch (NamespaceException e) { logger.warn("Unable to calculate split.", e); return false; } } return false; }
final int fieldCount = getLeafColumnCount(tableMetadata.getSchema(), projectedColumns); double workCost = getCostAdjustmentFactor() * (rowCount * fieldCount * getTableMetadata().getReadDefinition().getScanStats().getScanFactor()) * DremioCost.SCAN_CPU_COST_MULTIPLIER;
@Override public List<Entry<String, ByteString>> getSharedData() { return Collections.singletonList( new SimpleEntry<>( HIVE_ATTRIBUTE_KEY, ByteString.copyFrom(dataset.getReadDefinition().getExtendedProperty().asReadOnlyByteBuffer()) ) ); }
@Override public int getMaxParallelizationWidth(){ return tableMetadata.getSplitCount(); }
final Map<String, Integer> partitionColumnsToIdMap = Maps.newHashMap(); int index = 0; for (String column : scanRel.getTableMetadata().getReadDefinition().getPartitionColumnsList()) { partitionColumnsToIdMap.put(column, index++); logger.debug("Partition pruning using expression evaluation took {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS)); }else { finalNewSplits = ImmutableList.copyOf(dataset.value.getSplits()); evalPruned = false; } else { inputRel = scanRel.applyDatasetPointer(dataset.value.prune(finalNewSplits));
protected void doPushFilterToScan(final RelOptRuleCall call, final FilterPrel filter, final ProjectPrel project, final HBaseScanPrel scan, final RexNode condition) { final LogicalExpression conditionExp = RexToExpr.toExpr(new ParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan.getRowType(), scan.getCluster().getRexBuilder(), condition); final HBaseFilterBuilder hbaseFilterBuilder = new HBaseFilterBuilder(TableNameGetter.getTableName(scan.getTableMetadata().getName()), scan.getStartRow(), scan.getStopRow(), scan.getFilter(), conditionExp); final HBaseScanSpec newScanSpec = hbaseFilterBuilder.parseTree(); if (newScanSpec == null) { return; //no filter pushdown ==> No transformation. } Predicate<DatasetSplit> predicate = newScanSpec.getRowKeyPredicate(); TableMetadata metadata = scan.getTableMetadata(); if(predicate != null) { try { metadata = metadata.prune(predicate); } catch (NamespaceException ex) { throw Throwables.propagate(ex); } } final HBaseScanPrel newScanPrel = new HBaseScanPrel(scan.getCluster(), scan.getTraitSet(), scan.getTable(), metadata, scan.getProjectedColumns(), scan.getObservedRowcountAdjustment(), newScanSpec.getStartRow(), newScanSpec.getStopRow(), newScanSpec.getSerializedFilter()); // Depending on whether is a project in the middle, assign either scan or copy of project to childRel. final RelNode childRel = project == null ? newScanPrel : project.copy(project.getTraitSet(), ImmutableList.of((RelNode)newScanPrel));; if (hbaseFilterBuilder.isAllExpressionsConverted()) { /* * Since we could convert the entire filter condition expression into an HBase filter, * we can eliminate the filter operator altogether. */ call.transformTo(childRel); } else { call.transformTo(filter.copy(filter.getTraitSet(), ImmutableList.of(childRel))); } }
@Override public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException { final HBaseScanSpec spec = new HBaseScanSpec(getTableMetadata().getName(), startRow, stopRow, filter); return creator.addMetadata(this, new HBaseGroupScan(spec, getTableMetadata(), getProjectedColumns(), getTableMetadata().getApproximateRecordCount())); }
final TableMetadata prunedDatasetPointer = scanRel.getTableMetadata().prune(SearchQueryUtils.and(splitFilters)); if (prunedDatasetPointer == datasetPointer) { datasetOutput.value = datasetPointer;