/** * Computes the intersection between the edge set and the given edge set. For all matching pairs, * only one edge will be in the resulting data set. * * @param edges edges to compute intersection with * @return edge set containing one edge for all matching pairs of the same edge */ private DataSet<Edge<K, EV>> getDistinctEdgeIntersection(DataSet<Edge<K, EV>> edges) { return this.getEdges() .join(edges) .where(0, 1, 2) .equalTo(0, 1, 2) .with(new JoinFunction<Edge<K, EV>, Edge<K, EV>, Edge<K, EV>>() { @Override public Edge<K, EV> join(Edge<K, EV> first, Edge<K, EV> second) throws Exception { return first; } }).withForwardedFieldsFirst("*").name("Intersect edges") .distinct() .name("Edges"); }
@Override public DataSet<Edge<K, Tuple2<EV, Degrees>>> runInternal(Graph<K, VV, EV> input) throws Exception { // t, d(t) DataSet<Vertex<K, Degrees>> vertexDegrees = input .run(new VertexDegrees<K, VV, EV>() .setParallelism(parallelism)); // s, t, d(t) return input.getEdges() .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .with(new JoinEdgeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge target degrees"); } }
@Override public DataSet<Edge<K, Tuple2<EV, Degrees>>> runInternal(Graph<K, VV, EV> input) throws Exception { // s, d(s) DataSet<Vertex<K, Degrees>> vertexDegrees = input .run(new VertexDegrees<K, VV, EV>() .setParallelism(parallelism)); // s, t, d(s) return input.getEdges() .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .with(new JoinEdgeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge source degrees"); } }
@Override public DataSet<Edge<K, Tuple2<EV, LongValue>>> runInternal(Graph<K, VV, EV> input) throws Exception { // t, d(t) DataSet<Vertex<K, LongValue>> vertexDegrees = input .run(new VertexDegree<K, VV, EV>() .setReduceOnTargetId(!reduceOnSourceId.get()) .setParallelism(parallelism)); // s, t, d(t) return input.getEdges() .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .with(new JoinEdgeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge target degree"); } }
@Override public DataSet<Edge<K, Tuple2<EV, LongValue>>> runInternal(Graph<K, VV, EV> input) throws Exception { // s, d(s) DataSet<Vertex<K, LongValue>> vertexDegrees = input .run(new VertexDegree<K, VV, EV>() .setReduceOnTargetId(reduceOnTargetId.get()) .setParallelism(parallelism)); // s, t, d(s) return input.getEdges() .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .with(new JoinEdgeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge source degree"); } }
@Override public DataSet<Edge<K, Tuple3<EV, Degrees, Degrees>>> runInternal(Graph<K, VV, EV> input) throws Exception { // s, t, d(s) DataSet<Edge<K, Tuple2<EV, Degrees>>> edgeSourceDegrees = input .run(new EdgeSourceDegrees<K, VV, EV>() .setParallelism(parallelism)); // t, d(t) DataSet<Vertex<K, Degrees>> vertexDegrees = input .run(new VertexDegrees<K, VV, EV>() .setParallelism(parallelism)); // s, t, (d(s), d(t)) return edgeSourceDegrees .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .with(new JoinEdgeDegreeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge target degree"); } }
.with(new ProjectVertex<>()) .setParallelism(parallelism) .name("Project low-degree vertices"); .with(new ProjectEdge<>()) .setParallelism(parallelism) .name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source")) .leftOuterJoin(highDegreeVertices, joinHint) .where(reduceOnTargetId.get() ? 0 : 1) .with(new ProjectEdge<>()) .setParallelism(parallelism) .name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));
.join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexIdJoin<>(1)) .withForwardedFieldsFirst("f1->f0").name("Edge with source vertex ID"); return edgesWithSources.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction<>(neighborsFunction)).name("Neighbors function"); .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexIdJoin<>(0)) .withForwardedFieldsFirst("f0").name("Edge with target vertex ID"); return edgesWithTargets.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction<>(neighborsFunction)).name("Neighbors function");
@Override public DataSet<Edge<K, Tuple3<EV, LongValue, LongValue>>> runInternal(Graph<K, VV, EV> input) throws Exception { // s, t, d(s) DataSet<Edge<K, Tuple2<EV, LongValue>>> edgeSourceDegrees = input .run(new EdgeSourceDegree<K, VV, EV>() .setReduceOnTargetId(reduceOnTargetId.get()) .setParallelism(parallelism)); // t, d(t) DataSet<Vertex<K, LongValue>> vertexDegrees = input .run(new VertexDegree<K, VV, EV>() .setReduceOnTargetId(reduceOnTargetId.get()) .setParallelism(parallelism)); // s, t, (d(s), d(t)) return edgeSourceDegrees .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .with(new JoinEdgeDegreeWithVertexDegree<>()) .setParallelism(parallelism) .name("Edge target degree"); } }
.join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexWithNeighborValueJoin<>(1)) .withForwardedFieldsFirst("f1->f0").name("Vertex with in-neighbor value"); return verticesWithSourceNeighborValues.groupBy(0).reduce(new ApplyNeighborReduceFunction<>( reduceNeighborsFunction)).name("Neighbors function"); .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexWithNeighborValueJoin<>(0)) .withForwardedFieldsFirst("f0").name("Vertex with out-neighbor value"); return verticesWithTargetNeighborValues.groupBy(0).reduce(new ApplyNeighborReduceFunction<>( reduceNeighborsFunction)).name("Neighbors function");
.join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexIdJoin<>(1)) .withForwardedFieldsFirst("f1->f0").name("Edge with source vertex ID"); return edgesWithSources.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction<>(neighborsFunction)) .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexIdJoin<>(0)) .withForwardedFieldsFirst("f0").name("Edge with target vertex ID"); return edgesWithTargets.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction<>(neighborsFunction))
.with(new JoinVertexWithVertexDegree<>()) .setParallelism(parallelism) .name("Zero degree vertices");
.with(new JoinVertexWithVertexDegree<>()) .setParallelism(parallelism) .name("Zero degree vertices");
.with(new JoinVertexWithVertexDegree<>()) .setParallelism(parallelism) .name("Zero degree vertices");
.with(new JoinVertexDegreeWithTriangleCount<>()) .setParallelism(parallelism) .name("Clustering coefficient");
.with(new JoinVertexDegreeWithTriangleCount<>()) .setParallelism(parallelism) .name("Clustering coefficient");
@Override public DataSet<Vertex<K, Degrees>> runInternal(Graph<K, VV, EV> input) throws Exception { // s, t, bitmask DataSet<Tuple2<K, ByteValue>> vertexWithEdgeOrder = input.getEdges() .flatMap(new EmitAndFlipEdge<>()) .setParallelism(parallelism) .name("Emit and flip edge") .groupBy(0, 1) .reduceGroup(new ReduceBitmask<>()) .setParallelism(parallelism) .name("Reduce bitmask"); // s, d(s) DataSet<Vertex<K, Degrees>> vertexDegrees = vertexWithEdgeOrder .groupBy(0) .reduceGroup(new DegreeCount<>()) .setParallelism(parallelism) .name("Degree count"); if (includeZeroDegreeVertices.get()) { vertexDegrees = input.getVertices() .leftOuterJoin(vertexDegrees) .where(0) .equalTo(0) .with(new JoinVertexWithVertexDegrees<>()) .setParallelism(parallelism) .name("Zero degree vertices"); } return vertexDegrees; }
.<Tuple3<K, K, FloatValue>>projectSecond(2) .setParallelism(parallelism) .name("Edge score");
.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);