.where(0).equalTo(0).projectFirst(1).projectSecond(0);
.projectFirst(0, 1) .<Tuple3<Long, Long, Long>>projectSecond(2) .withPartitioner(partitioner);
@Test public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception { /* * Test convergence criterion with parameter for iterate delta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3)); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).projectFirst(0, 1); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
@Test public void testProjectOnATuple1Input() throws Exception { /* * Project join on a tuple input 1 */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = ds1.join(ds2) .where(1) .equalTo(1) .projectFirst(2, 1) .projectSecond(3) .projectFirst(0) .projectSecond(4, 1); List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect(); String expected = "Hi,1,Hallo,1,1,1\n" + "Hello,2,Hallo Welt,2,2,2\n" + "Hello world,2,Hallo Welt,3,2,2\n"; compareResultAsTuples(result, expected); }
.projectFirst(0).projectSecond(0);
@Test public void testJoinProjectionSemProps1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); tupleDs.join(tupleDs).where(0).equalTo(0) .projectFirst(2, 3) .projectSecond(1, 4) .output(new DiscardingOutputFormat<Tuple>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> projectJoinOperator = ((InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput()); DualInputSemanticProperties props = projectJoinOperator.getSemanticProperties(); assertEquals(1, props.getForwardingTargetFields(0, 2).size()); assertEquals(1, props.getForwardingTargetFields(0, 3).size()); assertEquals(1, props.getForwardingTargetFields(1, 1).size()); assertEquals(1, props.getForwardingTargetFields(1, 4).size()); assertTrue(props.getForwardingTargetFields(0, 2).contains(0)); assertTrue(props.getForwardingTargetFields(0, 3).contains(1)); assertTrue(props.getForwardingTargetFields(1, 1).contains(2)); assertTrue(props.getForwardingTargetFields(1, 4).contains(3)); }
@Test(expected = InvalidProgramException.class) public void testRangePartitionInIteration() throws Exception { // does not apply for collection execution if (super.mode == TestExecutionMode.COLLECTION) { throw new InvalidProgramException("Does not apply for collection execution"); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> source = env.generateSequence(0, 10000); DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() { @Override public Tuple2<Long, String> map(Long v) throws Exception { return new Tuple2<>(v, Long.toString(v)); } }); DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0); DataSet<Tuple2<Long, String>> body = it.getWorkset() .partitionByRange(1) // Verify that range partition is not allowed in iteration .join(it.getSolutionSet()) .where(0).equalTo(0).projectFirst(0).projectSecond(1); DataSet<Tuple2<Long, String>> result = it.closeWith(body, body); result.collect(); // should fail }
@Test public void testReplicatedSourceToJoin() throws Exception { /* * Test replicated source going into join */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit> (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO) .map(new ToTuple()); DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple()); DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0) .projectFirst(0) .sum(0); List<Tuple> result = pairs.collect(); String expectedResult = "(500500)"; compareResultAsText(result, expectedResult); }
private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() { return edges .join(topVertices, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .projectFirst(0, 1, 2) .<Tuple4<KT, KB, EV, VVT>>projectSecond(1) .name("Edge with vertex") .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .projectFirst(0, 1, 2, 3) .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1) .name("Edge with vertices"); }
@Test public void testJoinProjection4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0, 2) .projectSecond(1, 4) .projectFirst(1); } catch (Exception e) { Assert.fail(); } }
@Test public void testJoinProjection3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .projectSecond(3); } catch (Exception e) { Assert.fail(); } }
@Test public void testJoinProjection1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0); } catch (Exception e) { Assert.fail(); } }
@Test(expected = IndexOutOfBoundsException.class) public void testJoinProjection14() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .projectSecond(5); }
@Test(expected = IndexOutOfBoundsException.class) public void testJoinProjection28() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(5); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longs = env.generateSequence(0, 100000); DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper()); DataSet<Tuple1<Long>> longT2 = longT1.project(0); DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper()); longT2.join(longT3).where(0).equalTo(0).projectFirst(0) .join(longT1).where(0).equalTo(0).projectFirst(0) .writeAsText(resultPath); env.execute(); }
@Test(expected = IndexOutOfBoundsException.class) public void testJoinProjection30() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, type does not match ds1.join(ds2).where(0).equalTo(0) .projectFirst(-1); }
@Test public void testJoinProjection2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0, 3); } catch (Exception e) { Assert.fail(); } }
@Test(expected = IndexOutOfBoundsException.class) public void testJoinProjection34() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .projectSecond(-1); }
@Test(expected = IndexOutOfBoundsException.class) public void testJoinProjection8() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(5); }
public void testJoinProjection10() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.join(ds2).where(0).equalTo(0) .projectFirst(2); }