org.apache.flink.api.java.operators.CoGroupOperator$CoGroupOperatorSets$CoGroupOperatorSetsPredicate java code examples

@Test
public void testRejectWhenSolutionSetKeysDontMatchCoGroup() {
  try {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
    @SuppressWarnings("unchecked")
    DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
    DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, 10, 1);
    try {
      iteration.getWorkset().coGroup(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetCoGroup1());
      fail("Accepted invalid program.");
    }
    catch (InvalidProgramException e) {
      // all good!
    }
    try {
      iteration.getSolutionSet().coGroup(iteration.getWorkset()).where(2).equalTo(1).with(new SolutionWorksetCoGroup2());
      fail("Accepted invalid program.");
    }
    catch (InvalidProgramException e) {
      // all good!
    }
  }
  catch (Exception e) {
    System.err.println(e.getMessage());
    e.printStackTrace();
    fail(e.getMessage());
  }
}

@Test
public void testCoGroupLambda() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, String>> left = env.fromElements(
    new Tuple2<>(1, "hello"),
    new Tuple2<>(2, "what's"),
    new Tuple2<>(2, "up")
  );
  DataSet<Tuple2<Integer, String>> right = env.fromElements(
    new Tuple2<>(1, "not"),
    new Tuple2<>(1, "much"),
    new Tuple2<>(2, "really")
  );
  DataSet<Integer> joined = left.coGroup(right).where(0).equalTo(0)
    .with((Iterable<Tuple2<Integer, String>> values1, Iterable<Tuple2<Integer, String>> values2,
        Collector<Integer> out) -> {
      int sum = 0;
      for (Tuple2<Integer, String> next : values1) {
        sum += next.f0;
      }
      for (Tuple2<Integer, String> next : values2) {
        sum += next.f0;
      }
      out.collect(sum);
    }).returns(Integer.class);
  List<Integer> result = joined.collect();
  String expected = "6\n3\n";
  compareResultAsText(result, expected);
}

@Test
public void testCoGroupKeySelectors1() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
  DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
  // should work
  try {
    ds1.coGroup(ds2)
    .where(
        new KeySelector<CustomType, Long>() {
            @Override
            public Long getKey(CustomType value) {
              return value.myLong;
            }
          }
        )
    .equalTo(
        new KeySelector<CustomType, Long>() {
            @Override
            public Long getKey(CustomType value) {
              return value.myLong;
            }
          }
        );
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void testCoGroupWithRangePartitioning() throws Exception {
  /*
   * Test coGroup on tuples with multiple key field positions and same customized distribution
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
  DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
  env.setParallelism(4);
  TestDistribution testDis = new TestDistribution();
  DataSet<Tuple3<Integer, Long, String>> coGrouped =
      DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
          .coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
          .where(0, 4)
          .equalTo(0, 1)
          .with(new Tuple5Tuple3CoGroup());
  List<Tuple3<Integer, Long, String>> result = coGrouped.collect();
  String expected = "1,1,Hallo\n" +
      "2,2,Hallo Welt\n" +
      "3,2,Hallo Welt wie gehts?\n" +
      "3,2,ABC\n" +
      "5,3,HIJ\n" +
      "5,3,IJK\n";
  compareResultAsTuples(result, expected);
}

cfg.setString(Optimizer.HINT_SHIP_STRATEGY_SECOND_INPUT, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE);
input.coGroup(input).where(0).equalTo(0)
  .with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>())
  .withParameters(cfg)

.where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector())
.withPartitioner(partitioner)
.with(new DummyCoGroupFunction<Pojo2, Pojo3>())

.where(1).equalTo(0)
.withPartitioner(partitioner)
.with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>())

.where("b").equalTo("a")
.withPartitioner(partitioner)
.with(new DummyCoGroupFunction<Pojo2, Pojo3>())

@Override
protected void testProgram() throws Exception {
  // set up execution environment
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // read vertex and edge data
  DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
      .map(new VertexParser());
  DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
      .flatMap(new EdgeParser());
  // assign the initial components (equal to the vertex id)
  DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
  // open a delta iteration
  DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
      verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
  // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
  DataSet<Tuple2<Long, Long>> changes = iteration
      .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
      .coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
      .with(new MinIdAndUpdate());
  // close the delta iteration (delta and new workset are identical)
  DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
  // emit result
  List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
  result.output(new LocalCollectionOutputFormat<>(resutTuples));
  env.execute();
}

@Test
public void testCoGroupWithTuplesWrongType() {
  try {
    final Partitioner<Integer> partitioner = new TestPartitionerInt();
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
    DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
    
    try {
      input1
        .coGroup(input2)
        .where(1).equalTo(0)
        .withPartitioner(partitioner);
      fail("should throw an exception");
    }
    catch (InvalidProgramException e) {
      // expected
    }
  }
  catch (Exception e) {
    e.printStackTrace();
    fail(e.getMessage());
  }
}

/**
 * Joins the vertex DataSet of this graph with an input Tuple2 DataSet and applies
 * a user-defined transformation on the values of the matched records.
 * The vertex ID and the first field of the Tuple2 DataSet are used as the join keys.
 *
 * @param inputDataSet the Tuple2 DataSet to join with.
 * The first field of the Tuple2 is used as the join key and the second field is passed
 * as a parameter to the transformation function.
 * @param vertexJoinFunction the transformation function to apply.
 * The first parameter is the current vertex value and the second parameter is the value
 * of the matched Tuple2 from the input DataSet.
 * @return a new Graph, where the vertex values have been updated according to the
 * result of the vertexJoinFunction.
 *
 * @param <T> the type of the second field of the input Tuple2 DataSet.
*/
public <T> Graph<K, VV, EV> joinWithVertices(DataSet<Tuple2<K, T>> inputDataSet,
    final VertexJoinFunction<VV, T> vertexJoinFunction) {
  DataSet<Vertex<K, VV>> resultedVertices = this.getVertices()
      .coGroup(inputDataSet).where(0).equalTo(0)
      .with(new ApplyCoGroupToVertexValues<>(vertexJoinFunction))
        .name("Join with vertices");
  return new Graph<>(resultedVertices, this.edges, this.context);
}

@Test
public void testCoGroupWithKeySelectorsWrongType() {
  try {
    final Partitioner<Long> partitioner = new TestPartitionerLong();
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
    DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
    
    try {
      input1
        .coGroup(input2)
        .where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector())
        .withPartitioner(partitioner);
      
      fail("should throw an exception");
    }
    catch (InvalidProgramException e) {
      // expected
    }
  }
  catch (Exception e) {
    e.printStackTrace();
    fail(e.getMessage());
  }
}

/**
 * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation
 * on the values of the matched records.
 * The target ID of the edges input and the first field of the input DataSet are used as join keys.
 *
 * @param inputDataSet the DataSet to join with.
 * The first field of the Tuple2 is used as the join key
 * and the second field is passed as a parameter to the transformation function.
 * @param edgeJoinFunction the transformation function to apply.
 * The first parameter is the current edge value and the second parameter is the value
 * of the matched Tuple2 from the input DataSet.
 * @param <T> the type of the second field of the input Tuple2 DataSet.
 * @return a new Graph, where the edge values have been updated according to the
 * result of the edgeJoinFunction.
*/
public <T> Graph<K, VV, EV> joinWithEdgesOnTarget(DataSet<Tuple2<K, T>> inputDataSet,
    final EdgeJoinFunction<EV, T> edgeJoinFunction) {
  DataSet<Edge<K, EV>> resultedEdges = this.getEdges()
      .coGroup(inputDataSet).where(1).equalTo(0)
      .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction))
        .name("Join with edges on target");
  return new Graph<>(this.vertices, resultedEdges, this.context);
}

/**
 * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation
 * on the values of the matched records.
 * The source ID of the edges input and the first field of the input DataSet are used as join keys.
 *
 * @param inputDataSet the DataSet to join with.
 * The first field of the Tuple2 is used as the join key
 * and the second field is passed as a parameter to the transformation function.
 * @param edgeJoinFunction the transformation function to apply.
 * The first parameter is the current edge value and the second parameter is the value
 * of the matched Tuple2 from the input DataSet.
 * @param <T> the type of the second field of the input Tuple2 DataSet.
 * @return a new Graph, where the edge values have been updated according to the
 * result of the edgeJoinFunction.
*/
public <T> Graph<K, VV, EV> joinWithEdgesOnSource(DataSet<Tuple2<K, T>> inputDataSet,
    final EdgeJoinFunction<EV, T> edgeJoinFunction) {
  DataSet<Edge<K, EV>> resultedEdges = this.getEdges()
      .coGroup(inputDataSet).where(0).equalTo(0)
      .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction))
        .name("Join with edges on source");
  return new Graph<>(this.vertices, resultedEdges, this.context);
}

/**
 * Joins the edge DataSet with an input DataSet on the composite key of both
 * source and target IDs and applies a user-defined transformation on the values
 * of the matched records. The first two fields of the input DataSet are used as join keys.
 *
 * @param inputDataSet the DataSet to join with.
 * The first two fields of the Tuple3 are used as the composite join key
 * and the third field is passed as a parameter to the transformation function.
 * @param edgeJoinFunction the transformation function to apply.
 * The first parameter is the current edge value and the second parameter is the value
 * of the matched Tuple3 from the input DataSet.
 * @param <T> the type of the third field of the input Tuple3 DataSet.
 * @return a new Graph, where the edge values have been updated according to the
 * result of the edgeJoinFunction.
*/
public <T> Graph<K, VV, EV> joinWithEdges(DataSet<Tuple3<K, K, T>> inputDataSet,
    final EdgeJoinFunction<EV, T> edgeJoinFunction) {
  DataSet<Edge<K, EV>> resultedEdges = this.getEdges()
      .coGroup(inputDataSet).where(0, 1).equalTo(0, 1)
      .with(new ApplyCoGroupToEdgeValues<>(edgeJoinFunction))
        .name("Join with edges");
  return new Graph<>(this.vertices, resultedEdges, this.context);
}

@Test
public void testCoGroupWithPojosWrongType() {
  try {
    final Partitioner<Long> partitioner = new TestPartitionerLong();
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
    DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
    
    try {
      input1
        .coGroup(input2)
        .where("a").equalTo("b")
        .withPartitioner(partitioner);
      
      fail("should throw an exception");
    }
    catch (InvalidProgramException e) {
      // expected
    }
  }
  catch (Exception e) {
    e.printStackTrace();
    fail(e.getMessage());
  }
}

@Test
 public void CoGroupWithSameDistributionTest() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  TestDistribution testDistribution1 = new TestDistribution(3);
  TestDistribution testDistribution2 = new TestDistribution(3);
  DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, testDistribution1, 0)
      .coGroup(DataSetUtils.partitionByRange(set2, testDistribution2, 0))
      .where(0).equalTo(0).with(new CoGroupFunc());
  coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource();
  Channel input1 = coGroup.getInput1();
  Channel input2 = coGroup.getInput2();
  assertEquals(ShipStrategyType.FORWARD, input1.getShipStrategy());
  assertEquals(ShipStrategyType.FORWARD, input2.getShipStrategy());
}

@Test
public void testCoGroupKeyMixing2() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
  // should work
  try {
    ds1.coGroup(ds2)
    .where(3)
    .equalTo(
        new KeySelector<CustomType, Long>() {
            @Override
            public Long getKey(CustomType value) {
              return value.myLong;
            }
          }
        );
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void testCoGroupKeyMixing1() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.coGroup(ds2)
    .where(
        new KeySelector<CustomType, Long>() {
            @Override
            public Long getKey(CustomType value) {
              return value.myLong;
            }
          }
        )
    .equalTo(3);
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void reuseBothPartitioningCoGroup4() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1
      .partitionByHash(0,2)
      .map(new MockMapper()).withForwardedFields("0;2")
      .coGroup(set2.partitionByHash(1)
          .map(new MockMapper())
          .withForwardedFields("1"))
      .where(0, 2).equalTo(2, 1).with(new MockCoGroup());
  coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource();
  checkValidCoGroupInputProperties(coGroup);
}

Most used methods

Popular in Java

Making http requests using okhttp
onRequestPermissionsResult (Fragment)
getSharedPreferences (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Top plugins for WebStorm

How to useCoGroupOperator$CoGroupOperatorSets$CoGroupOperatorSetsPredicate in org.apache.flink.api.java.operators

Best Java code snippets using org.apache.flink.api.java.operators.CoGroupOperator$CoGroupOperatorSets$CoGroupOperatorSetsPredicate (Showing top 20 results out of 315)

How to use
CoGroupOperator$CoGroupOperatorSets$CoGroupOperatorSetsPredicate
in
org.apache.flink.api.java.operators