/** * Tests join program with replicated data source behind filter. */ @Test public void checkJoinWithReplicatedSourceInputBehindFilter() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
@Override protected DataSet<ExpandEmbedding> iterate(DataSet<ExpandEmbedding> initialWorkingSet) { IterativeDataSet<ExpandEmbedding> iteration = initialWorkingSet .iterate(upperBound - 1) .name(getName()); DataSet<ExpandEmbedding> nextWorkingSet = iteration .filter(new FilterPreviousExpandEmbedding()) .name(getName() + " - FilterRecent") .join(candidateEdgeTuples, joinHint) .where(2).equalTo(0) .with(new MergeExpandEmbeddings( distinctVertexColumns, distinctEdgeColumns, closingColumn )) .name(getName() + " - Expansion"); DataSet<ExpandEmbedding> solutionSet = nextWorkingSet.union(iteration); return iteration.closeWith(solutionSet, nextWorkingSet); } }
@Override protected DataSet<ExpandEmbedding> iterate(DataSet<ExpandEmbedding> initialWorkingSet) { IterativeDataSet<ExpandEmbedding> iteration = initialWorkingSet .iterate(upperBound - 1) .name(getName()); DataSet<ExpandEmbedding> nextWorkingSet = iteration .filter(new FilterPreviousExpandEmbedding()) .name(getName() + " - FilterRecent") .join(candidateEdgeTuples, joinHint) .where(2).equalTo(0) .with(new MergeExpandEmbeddings( distinctVertexColumns, distinctEdgeColumns, closingColumn )) .name(getName() + " - Expansion"); DataSet<ExpandEmbedding> solutionSet = nextWorkingSet.union(iteration); return iteration.closeWith(solutionSet, nextWorkingSet); } }