.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .build();
public static boolean canReplicate(JoinNode joinNode, Context context) { JoinDistributionType joinDistributionType = getJoinDistributionType(context.getSession()); if (!joinDistributionType.canReplicate()) { return false; } Optional<DataSize> joinMaxBroadcastTableSize = getJoinMaxBroadcastTableSize(context.getSession()); if (!joinMaxBroadcastTableSize.isPresent()) { return true; } PlanNode buildSide = joinNode.getRight(); PlanNodeStatsEstimate buildSideStatsEstimate = context.getStatsProvider().getStats(buildSide); double buildSideSizeInBytes = buildSideStatsEstimate.getOutputSizeInBytes(buildSide.getOutputSymbols(), context.getSymbolAllocator().getTypes()); return buildSideSizeInBytes <= joinMaxBroadcastTableSize.get().toBytes(); }
@Test public void testJoinWithStatefulFilterFunction() { super.testJoinWithStatefulFilterFunction(); // Stateful function is placed in LEFT JOIN's ON clause and involves left & right symbols to prevent any kind of push down/pull down. Session session = Session.builder(getSession()) // With broadcast join, lineitem would be source-distributed and not executed concurrently. .setSystemProperty(SystemSessionProperties.JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.toString()) .build(); long joinOutputRowCount = 60175; assertQuery( session, format( "SELECT count(*) FROM lineitem l LEFT OUTER JOIN orders o ON l.orderkey = o.orderkey AND stateful_sleeping_sum(%s, 100, l.linenumber, o.shippriority) > 0", 10 * 1. / joinOutputRowCount), format("VALUES %s", joinOutputRowCount)); }
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .setSystemProperty(COLOCATED_JOIN, "true") .setSystemProperty(GROUPED_EXECUTION_FOR_AGGREGATION, "true")
@Test public void testJoinWithStatefulFilterFunction() { super.testJoinWithStatefulFilterFunction(); // Stateful function is placed in LEFT JOIN's ON clause and involves left & right symbols to prevent any kind of push down/pull down. Session session = Session.builder(getSession()) // With broadcast join, lineitem would be source-distributed and not executed concurrently. .setSystemProperty(SystemSessionProperties.JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.toString()) .build(); long joinOutputRowCount = 60175; assertQuery( session, format( "SELECT count(*) FROM lineitem l LEFT OUTER JOIN orders o ON l.orderkey = o.orderkey AND stateful_sleeping_sum(%s, 100, l.linenumber, o.shippriority) > 0", 10 * 1. / joinOutputRowCount), format("VALUES %s", joinOutputRowCount)); }
protected Session noJoinReordering() { return Session.builder(getSession()) .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.NONE.name()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .build(); } }
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .setSystemProperty(COLOCATED_JOIN, "true") .setSystemProperty(GROUPED_EXECUTION_FOR_AGGREGATION, "true")
JOIN_DISTRIBUTION_TYPE, format("The join method to use. Options are %s", Stream.of(JoinDistributionType.values()) .map(JoinDistributionType::name) .collect(joining(","))), featuresConfig.getJoinDistributionType(), false, value -> JoinDistributionType.valueOf(((String) value).toUpperCase()), JoinDistributionType::name), new PropertyMetadata<>(
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .setSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.toString(false))
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .build();
@Test public void testReplicatesUnrestrictedWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "1kB") .setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
@Test public void testRepartitionsWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 6400, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("B1", "A1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0)))); }
@Test public void testReplicateScalar() { assertDetermineJoinDistributionType() .on(p -> p.join( INNER, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.enforceSingleRow( p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11")))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(DistributionType.REPLICATED), values(ImmutableMap.of("A1", 0)), enforceSingleRow(values(ImmutableMap.of("B1", 0))))); }
private void testDetermineDistributionType(JoinDistributionType sessionDistributedJoin, Type joinType, DistributionType expectedDistribution) { assertDetermineJoinDistributionType() .on(p -> p.join( joinType, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11"))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, sessionDistributedJoin.name()) .matches(join( joinType, ImmutableList.of(equiJoinClause("B1", "A1")), Optional.empty(), Optional.of(expectedDistribution), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0)))); }
private void testReplicateNoEquiCriteria(Type joinType) { assertDetermineJoinDistributionType() .on(p -> p.join( joinType, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11"))), ImmutableList.of(), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.of(expression("A1 * B1 > 100")))) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .matches(join( joinType, ImmutableList.of(), Optional.of("A1 * B1 > 100"), Optional.of(DistributionType.REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
private void testRepartitionRightOuter(JoinDistributionType sessionDistributedJoin, Type joinType) { assertDetermineJoinDistributionType() .on(p -> p.join( joinType, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11"))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, sessionDistributedJoin.name()) .matches(join( joinType, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(DistributionType.PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
public TestTpcdsCostBasedPlan() { super(() -> { String catalog = "local"; Session.SessionBuilder sessionBuilder = testSessionBuilder() .setCatalog(catalog) .setSchema("sf3000.0") .setSystemProperty("task_concurrency", "1") // these tests don't handle exchanges from local parallel .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.AUTOMATIC.name()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()); LocalQueryRunner queryRunner = LocalQueryRunner.queryRunnerWithFakeNodeCountForStats(sessionBuilder.build(), 8); queryRunner.createCatalog( catalog, new TpcdsConnectorFactory(1), ImmutableMap.of()); return queryRunner; }); }
public TestTpchCostBasedPlan() { super(() -> { String catalog = "local"; SessionBuilder sessionBuilder = testSessionBuilder() .setCatalog(catalog) .setSchema("sf3000.0") .setSystemProperty("task_concurrency", "1") // these tests don't handle exchanges from local parallel .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.AUTOMATIC.name()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()); LocalQueryRunner queryRunner = LocalQueryRunner.queryRunnerWithFakeNodeCountForStats(sessionBuilder.build(), 8); queryRunner.createCatalog( catalog, new TpchConnectorFactory(1, false, false), ImmutableMap.of(TPCH_COLUMN_NAMING_PROPERTY, ColumnNaming.SIMPLIFIED.name())); return queryRunner; }); }
private PlanNode getSyntacticOrderJoin(JoinNode joinNode, Context context, JoinDistributionType joinDistributionType) { if (mustPartition(joinNode)) { return joinNode.withDistributionType(PARTITIONED); } if (mustReplicate(joinNode, context)) { return joinNode.withDistributionType(REPLICATED); } if (joinDistributionType.canPartition()) { return joinNode.withDistributionType(PARTITIONED); } return joinNode.withDistributionType(REPLICATED); }