appliedSet = appliedSet.name("Apply"); appliedSet = appliedSet.withBroadcastSet(e.f1, e.f0); appliedSet = appliedSet.withBroadcastSet(numberOfVertices, "number of vertices"); appliedSet.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
.name(JOIN_WITH_INVARIANT_NAME) .join(iter.getSolutionSet()) .where(1, 0) .equalTo(1, 2) .withForwardedFieldsSecond(joinPreservesSolutionSet ? new String[] {"0->0", "1->1", "2->2" } : null);
return new Tuple2<Long, Long>(left.f0, right.f1); }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("1") .union(paths) .groupBy(0, 1) .reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
/** * Computes the intersection between the edge set and the given edge set. For all matching pairs, * only one edge will be in the resulting data set. * * @param edges edges to compute intersection with * @return edge set containing one edge for all matching pairs of the same edge */ private DataSet<Edge<K, EV>> getDistinctEdgeIntersection(DataSet<Edge<K, EV>> edges) { return this.getEdges() .join(edges) .where(0, 1, 2) .equalTo(0, 1, 2) .with(new JoinFunction<Edge<K, EV>, Edge<K, EV>, Edge<K, EV>>() { @Override public Edge<K, EV> join(Edge<K, EV> first, Edge<K, EV> second) throws Exception { return first; } }).withForwardedFieldsFirst("*").name("Intersect edges") .distinct() .name("Edges"); }
.equalTo(0) .with(new ProjectVertex<>()) .setParallelism(parallelism) .name("Project low-degree vertices"); .equalTo(0) .with(new ProjectEdge<>()) .setParallelism(parallelism) .name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source")) .leftOuterJoin(highDegreeVertices, joinHint) .where(reduceOnTargetId.get() ? 0 : 1) .equalTo(0) .with(new ProjectEdge<>()) .setParallelism(parallelism) .name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));
.equalTo(0) .with(new JoinVertexWithVertexDegree<>()) .setParallelism(parallelism) .name("Zero degree vertices");
inputFields[0], keyFields[0], inputFields[1], keyFields[1])) .returns(tupleJoinListsTypeInfo) .withForwardedFieldsFirst(listKeysFwd) .setParallelism(dop) .name("coGroup-" + node.getID()); .where(listKeys).equalTo(flinkKeys[i]) .with(new TupleAppendOuterJoiner(i, numJoinInputs, inputFields[i], keyFields[i])) .returns(tupleJoinListsTypeInfo) .withForwardedFieldsFirst(listKeys) .setParallelism(dop) .name("coGroup-" + node.getID());
.equalTo(0) .with(new JoinAndNormalizeHubAndAuthority<>()) .withBroadcastSet(hubbinessSumSquared, HUBBINESS_SUM_SQUARED) .withBroadcastSet(authoritySumSquared, AUTHORITY_SUM_SQUARED) .setParallelism(parallelism) .name("Join scores"); .equalTo(0) .with(new ChangeInScores<>()) .setParallelism(parallelism) .name("Change in scores");
.projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor") .groupBy(0) .min(1) .name("Find Minimum Candidate Id"); .withForwardedFieldsSecond("0") .name("Update Component Id"); } else { updateComponentId = minCandidateId.join(iteration.getSolutionSet()) .withForwardedFieldsSecond("0") .name("Update Component Id");
private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() { return edges .join(topVertices, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .projectFirst(0, 1, 2) .<Tuple4<KT, KB, EV, VVT>>projectSecond(1) .name("Edge with vertex") .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .projectFirst(0, 1, 2, 3) .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1) .name("Edge with vertices"); }
out.collect(new Tuple3<>(first.f0, first.f1, second.f1)); }).withForwardedFieldsFirst("f0;f1").withForwardedFieldsSecond("f1");
@Override public DataSet<Edge<K, Tuple2<EV, Degrees>>> runInternal(Graph<K, VV, EV> input) throws Exception { // t, d(t) DataSet<Vertex<K, Degrees>> vertexDegrees = input .run(new VertexDegrees<K, VV, EV>() .setParallelism(parallelism)); // s, t, d(t) return input.getEdges() .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .with(new JoinEdgeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge target degrees"); } }
@Test public void testBinaryForwardedInLine1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(3L, 4L)); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input2 = env.fromElements(new Tuple2<Long, Long>(3L, 2L)); input1.join(input2).where(0).equalTo(0).with(new NoAnnotationJoin<Long>()) .withForwardedFieldsFirst("0->1; 1->2").withForwardedFieldsSecond("1->0") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput(); DualInputSemanticProperties semantics = join.getSemanticProperties(); assertNotNull(semantics.getForwardingTargetFields(1, 0)); assertEquals(1, semantics.getForwardingTargetFields(0, 0).size()); assertEquals(1, semantics.getForwardingTargetFields(0, 1).size()); assertEquals(1, semantics.getForwardingTargetFields(1, 1).size()); assertTrue(semantics.getForwardingTargetFields(0, 0).contains(1)); assertTrue(semantics.getForwardingTargetFields(0, 1).contains(2)); assertTrue(semantics.getForwardingTargetFields(1, 1).contains(0)); assertEquals(0, semantics.getForwardingTargetFields(1, 0).size()); }
.with(new DummyTestJoinFunction2()) .withForwardedFieldsFirst("2;4->0") .withForwardedFieldsSecond("0->4;1;1->3"); SemanticProperties semProps = joinOp.getSemanticProperties();
.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME); .name(JOIN_WITH_SOLUTION_SET); if(joinPreservesSolutionSet) { ((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
@Test public void testJoinWithBroadcastSet() throws Exception { /* * Join with broadcast set */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env); DataSet<Tuple3<String, String, Integer>> joinDs = ds1.fullOuterJoin(ds2) .where(1) .equalTo(4) .with(new T3T5BCJoin()) .withBroadcastSet(intDs, "ints"); List<Tuple3<String, String, Integer>> result = joinDs.collect(); String expected = "Hi,Hallo,55\n" + "Hi,Hallo Welt wie,55\n" + "Hello,Hallo Welt,55\n" + "Hello world,Hallo Welt,55\n"; compareResultAsTuples(result, expected); }
.where(new SourceId<>()).equalTo(new LeftElementId<>()) .with(new FlatJoinSourceEdgeReference(true)) .leftOuterJoin(idJoin) .where(new TargetId<>()).equalTo(new LeftElementId<>()) .with(new FlatJoinSourceEdgeReference(false)) .groupBy(new Id<>()) .reduceGroup(new AddNewIdToDuplicatedEdge()) .map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
.withBroadcastSet(input1, "bc2") .withBroadcastSet(result1, "bc3") .output(new DiscardingOutputFormat<String>());
.union(paths) .groupBy(0, 1) .reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env .fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27)); // Sc2 generates N x values to be evaluated with the polynomial identified by id DataSet<Tuple2<String, Integer>> sc2 = env .fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6)); // Sc3 generates N y values to be evaluated with the polynomial identified by id DataSet<Tuple2<String, Integer>> sc3 = env .fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7)); // Jn1 matches x and y values on id and emits (id, x, y) triples JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 = sc2.join(sc3).where(0).equalTo(0).with(new Jn1()); // Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 = jn1.join(sc1).where(0).equalTo(0).with(new Jn2()); // Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2 FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 = jn1.flatMap(new Mp1()); // Mp2 filters out all p values which can be divided by z List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect(); JavaProgramTestBase.compareResultAsText(result, RESULT); }