@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@Test public void zip() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x); JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles); zipped.count(); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@Test public void groupByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, Boolean> areOdd = x -> (x._1() % 2 == 0) && (x._2() % 2 == 0); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = pairRDD.groupBy(areOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@Test public void groupByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, Boolean> areOdd = x -> (x._1() % 2 == 0) && (x._2() % 2 == 0); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = pairRDD.groupBy(areOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, Boolean> areOdd = x -> (x._1() % 2 == 0) && (x._2() % 2 == 0); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = pairRDD.groupBy(areOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Override public <U> SparkPairStream<T, U> zip(@NonNull MStream<U> other) { if (other instanceof SparkStream) { return new SparkPairStream<>(rdd.zip(Cast.<SparkStream<U>>as(other).rdd)); } JavaSparkContext jsc = new JavaSparkContext(rdd.context()); return new SparkPairStream<>(rdd.zip(jsc.parallelize(other.collect(), rdd.partitions().size()))); }
.map(org.apache.spark.mllib.linalg.Vector::asML); JavaRDD<VectorPair> featuresExpected = dataRDD.zip(expected).map(pair -> { VectorPair featuresExpected1 = new VectorPair(); featuresExpected1.setFeatures(pair._1());
.map(org.apache.spark.mllib.linalg.Vector::asML); JavaRDD<VectorPair> featuresExpected = dataRDD.zip(expected).map(pair -> { VectorPair featuresExpected1 = new VectorPair(); featuresExpected1.setFeatures(pair._1());
.map(org.apache.spark.mllib.linalg.Vector::asML); JavaRDD<VectorPair> featuresExpected = dataRDD.zip(expected).map(pair -> { VectorPair featuresExpected1 = new VectorPair(); featuresExpected1.setFeatures(pair._1());