@Test public void CoGroupWithDifferentDistributionTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution testDistribution1 = new TestDistribution(3); TestDistribution testDistribution2 = new TestDistribution(2); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, testDistribution1, 0) .coGroup(DataSetUtils.partitionByRange(set2, testDistribution2, 0)) .where(0).equalTo(0).with(new CoGroupFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = coGroup.getInput1(); Channel input2 = coGroup.getInput2(); assertEquals(ShipStrategyType.PARTITION_HASH, input1.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, input2.getShipStrategy()); }
@Test public void CoGroupWithSameDistributionTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution testDistribution1 = new TestDistribution(3); TestDistribution testDistribution2 = new TestDistribution(3); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, testDistribution1, 0) .coGroup(DataSetUtils.partitionByRange(set2, testDistribution2, 0)) .where(0).equalTo(0).with(new CoGroupFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = coGroup.getInput1(); Channel input2 = coGroup.getInput2(); assertEquals(ShipStrategyType.FORWARD, input1.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, input2.getShipStrategy()); }
@Test public void testCoGroupWithRangePartitioning() throws Exception { /* * Test coGroup on tuples with multiple key field positions and same customized distribution */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env); env.setParallelism(4); TestDistribution testDis = new TestDistribution(); DataSet<Tuple3<Integer, Long, String>> coGrouped = DataSetUtils.partitionByRange(ds1, testDis, 0, 4) .coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1)) .where(0, 4) .equalTo(0, 1) .with(new Tuple5Tuple3CoGroup()); List<Tuple3<Integer, Long, String>> result = coGrouped.collect(); String expected = "1,1,Hallo\n" + "2,2,Hallo Welt\n" + "3,2,Hallo Welt wie gehts?\n" + "3,2,ABC\n" + "5,3,HIJ\n" + "5,3,IJK\n"; compareResultAsTuples(result, expected); }
.coGroup(input2.partitionCustom(partitioner, 0)) .where(1).equalTo(0) .with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>())