public Optional<PartitioningProperties> translate(Function<Symbol, Optional<Symbol>> translator) { Set<Symbol> newPartitioningColumns = partitioningColumns.stream() .map(translator) .filter(Optional::isPresent) .map(Optional::get) .collect(toImmutableSet()); // Translation fails if we have prior partitioning columns and none could be translated if (!partitioningColumns.isEmpty() && newPartitioningColumns.isEmpty()) { return Optional.empty(); } if (!partitioning.isPresent()) { return Optional.of(new PartitioningProperties(newPartitioningColumns, Optional.empty(), nullsAndAnyReplicated)); } Optional<Partitioning> newPartitioning = partitioning.get().translate(new Partitioning.Translator(translator, symbol -> Optional.empty(), coalesceSymbols -> Optional.empty())); if (!newPartitioning.isPresent()) { return Optional.empty(); } return Optional.of(new PartitioningProperties(newPartitioningColumns, newPartitioning, nullsAndAnyReplicated)); }
private static boolean meetsPartitioningRequirements(PreferredProperties preferred, ActualProperties actual) { if (!preferred.getGlobalProperties().isPresent()) { return true; } PreferredProperties.Global preferredGlobal = preferred.getGlobalProperties().get(); if (!preferredGlobal.isDistributed()) { return actual.isSingleNode(); } if (!preferredGlobal.getPartitioningProperties().isPresent()) { return !actual.isSingleNode(); } return actual.isStreamPartitionedOn(preferredGlobal.getPartitioningProperties().get().getPartitioningColumns()); }
public Global mergeWithParent(Global parent) { if (distributed != parent.distributed) { return this; } if (!partitioningProperties.isPresent()) { return parent; } if (!parent.partitioningProperties.isPresent()) { return this; } return new Global(distributed, Optional.of(partitioningProperties.get().mergeWithParent(parent.partitioningProperties.get()))); }
if (parentGlobal.isPresent() && parentGlobal.get().isDistributed() && parentGlobal.get().getPartitioningProperties().isPresent()) { PreferredProperties.PartitioningProperties parentPartitioningPreference = parentGlobal.get().getPartitioningProperties().get(); boolean nullsAndAnyReplicated = parentPartitioningPreference.isNullsAndAnyReplicated(); Partitioning desiredParentPartitioning = selectUnionPartitioning(node, parentPreference, parentPartitioningPreference); .global(PreferredProperties.Global.distributed(PreferredProperties.PartitioningProperties.partitioned(childPartitioning) .withNullsAndAnyReplicated(nullsAndAnyReplicated))) .build(); .global(partitionedOn(desiredParentPartitioning, Optional.of(desiredParentPartitioning))) .build() .withReplicatedNulls(parentPartitioningPreference.isNullsAndAnyReplicated()));
private boolean shouldRepartitionForIndexJoin(List<Symbol> joinColumns, PreferredProperties parentPreferredProperties, ActualProperties probeProperties) { // See if distributed index joins are enabled if (!distributedIndexJoins) { return false; } // No point in repartitioning if the plan is not distributed if (probeProperties.isSingleNode()) { return false; } Optional<PreferredProperties.PartitioningProperties> parentPartitioningPreferences = parentPreferredProperties.getGlobalProperties() .flatMap(PreferredProperties.Global::getPartitioningProperties); // Disable repartitioning if it would disrupt a parent's partitioning preference when streaming is enabled boolean parentAlreadyPartitionedOnChild = parentPartitioningPreferences .map(partitioning -> probeProperties.isStreamPartitionedOn(partitioning.getPartitioningColumns())) .orElse(false); if (preferStreamingOperators && parentAlreadyPartitionedOnChild) { return false; } // Otherwise, repartition if we need to align with the join columns if (!probeProperties.isStreamPartitionedOn(joinColumns)) { return true; } // If we are already partitioned on the join columns because the data has been forced effectively into one stream, // then we should repartition if that would make a difference (from the single stream state). return probeProperties.isEffectivelySingleStream() && probeProperties.isStreamRepartitionEffective(joinColumns); }
private Partitioning selectUnionPartitioning(UnionNode node, PreferredProperties preferredProperties, PreferredProperties.PartitioningProperties parentPreference) { // Use the parent's requested partitioning if available if (parentPreference.getPartitioning().isPresent()) { return parentPreference.getPartitioning().get(); } // Try planning the children to see if any of them naturally produce a partitioning (for now, just select the first) boolean nullsAndAnyReplicated = parentPreference.isNullsAndAnyReplicated(); for (int sourceIndex = 0; sourceIndex < node.getSources().size(); sourceIndex++) { PreferredProperties.PartitioningProperties childPartitioning = parentPreference.translate(outputToInputTranslator(node, sourceIndex)).get(); PreferredProperties childPreferred = PreferredProperties.builder() .global(PreferredProperties.Global.distributed(childPartitioning.withNullsAndAnyReplicated(nullsAndAnyReplicated))) .build(); PlanWithProperties child = node.getSources().get(sourceIndex).accept(this, childPreferred); if (child.getProperties().isNodePartitionedOn(childPartitioning.getPartitioningColumns(), nullsAndAnyReplicated)) { Function<Symbol, Optional<Symbol>> childToParent = createTranslator(createMapping(node.sourceOutputLayout(sourceIndex), node.getOutputSymbols())); return child.getProperties().translate(childToParent).getNodePartitioning().get(); } } // Otherwise, choose an arbitrary partitioning over the columns return Partitioning.create(FIXED_HASH_DISTRIBUTION, ImmutableList.copyOf(parentPreference.getPartitioningColumns())); }
public PartitioningProperties mergeWithParent(PartitioningProperties parent) { // Non-negotiable if we require a specific partitioning if (partitioning.isPresent()) { return this; } // Partitioning with different replication cannot be compared if (nullsAndAnyReplicated != parent.nullsAndAnyReplicated) { return this; } if (parent.partitioning.isPresent()) { // If the parent has a partitioning preference, propagate parent only if the parent's partitioning columns satisfies our preference. // Otherwise, ignore the parent since the parent will have to repartition anyways. return partitioningColumns.containsAll(parent.partitioningColumns) ? parent : this; } // Otherwise partition on any common columns if available Set<Symbol> common = Sets.intersection(partitioningColumns, parent.partitioningColumns); return common.isEmpty() ? this : partitioned(common).withNullsAndAnyReplicated(nullsAndAnyReplicated); }
public PartitioningProperties withNullsAndAnyReplicated(boolean nullsAndAnyReplicated) { return new PartitioningProperties(partitioningColumns, partitioning, nullsAndAnyReplicated); }
public static Global undistributed() { return new Global(false, Optional.of(PartitioningProperties.singlePartition())); }
public static PreferredProperties partitionedWithNullsAndAnyReplicated(Partitioning partitioning) { return builder() .global(Global.distributed(PartitioningProperties.partitioned(partitioning).withNullsAndAnyReplicated(true))) .build(); }
public static PartitioningProperties partitioned(Set<Symbol> columns) { return new PartitioningProperties(columns, Optional.empty(), false); }
public static PartitioningProperties partitioned(Partitioning partitioning) { return new PartitioningProperties(partitioning.getColumns(), Optional.of(partitioning), false); }
public static PartitioningProperties singlePartition() { return partitioned(ImmutableSet.of()); }
public static PreferredProperties partitioned(Partitioning partitioning) { return builder() .global(Global.distributed(PartitioningProperties.partitioned(partitioning))) .build(); }
public static PreferredProperties partitioned(Set<Symbol> columns) { return builder() .global(Global.distributed(PartitioningProperties.partitioned(columns))) .build(); }
public Global translate(Function<Symbol, Optional<Symbol>> translator) { if (!isDistributed()) { return this; } return distributed(partitioningProperties.flatMap(properties -> properties.translate(translator))); }
public static PreferredProperties partitionedWithLocal(Set<Symbol> columns, List<? extends LocalProperty<Symbol>> localProperties) { return builder() .global(Global.distributed(PartitioningProperties.partitioned(columns))) .local(localProperties) .build(); }
public static PreferredProperties partitionedWithNullsAndAnyReplicated(Set<Symbol> columns) { return builder() .global(Global.distributed(PartitioningProperties.partitioned(columns).withNullsAndAnyReplicated(true))) .build(); }