private <IN1, IN2> DataSet<Tuple2<byte[], byte[]>> createDefaultJoin(DataSet<IN1> op1, DataSet<IN2> op2, List<String> firstKeys, List<String> secondKeys, DatasizeHint mode, int parallelism) { String[] firstKeysArray = firstKeys.toArray(new String[firstKeys.size()]); String[] secondKeysArray = secondKeys.toArray(new String[secondKeys.size()]); switch (mode) { case NONE: return op1 .join(op2).where(firstKeysArray).equalTo(secondKeysArray).setParallelism(parallelism) .map(new NestedKeyDiscarder<Tuple2<IN1, IN2>>()).setParallelism(parallelism).name("DefaultJoinPostStep"); case HUGE: return op1 .joinWithHuge(op2).where(firstKeysArray).equalTo(secondKeysArray).setParallelism(parallelism) .map(new NestedKeyDiscarder<Tuple2<IN1, IN2>>()).setParallelism(parallelism).name("DefaultJoinPostStep"); case TINY: return op1 .joinWithTiny(op2).where(firstKeysArray).equalTo(secondKeysArray).setParallelism(parallelism) .map(new NestedKeyDiscarder<Tuple2<IN1, IN2>>()).setParallelism(parallelism).name("DefaultJoinPostStep"); default: throw new IllegalArgumentException("Invalid join mode specified."); } }
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }