List<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>> list = Arrays.asList(); // Random batch time, for the sake of the example. JavaStreamingContext jssc = new JavaStreamingContext("local[*]", "TestApp", new Duration(4000)); Queue<JavaRDD<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>>> rddQueue = new LinkedList<>(); rddQueue.add(jssc.sparkContext().parallelize(list)); JavaDStream<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>> dStream = jssc.queueStream(rddQueue);
// Create the context JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create the queue through which RDDs can be pushed to // a QueueInputDStream Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>(); // Create and push some RDDs into the queue List<Integer> list = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { list.add(i); } for (int i = 0; i < 30; i++) { rddQueue.add(ssc.sparkContext().parallelize(list)); } // Create the QueueInputDStream and use it do some processing JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue); JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair( new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer i) { return new Tuple2<Integer, Integer>(i % 10, 1); } });
@SuppressWarnings("unchecked") @Test public void testQueueStream() { ssc.stop(); // Create a new JavaStreamingContext without checkpointing SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("test") .set("spark.streaming.clock", "org.apache.spark.util.ManualClock"); ssc = new JavaStreamingContext(conf, new Duration(1000)); List<List<Integer>> expected = Arrays.asList( Arrays.asList(1,2,3), Arrays.asList(4,5,6), Arrays.asList(7,8,9)); JavaSparkContext jsc = new JavaSparkContext(ssc.ssc().sc()); JavaRDD<Integer> rdd1 = jsc.parallelize(Arrays.asList(1, 2, 3)); JavaRDD<Integer> rdd2 = jsc.parallelize(Arrays.asList(4, 5, 6)); JavaRDD<Integer> rdd3 = jsc.parallelize(Arrays.asList(7,8,9)); Queue<JavaRDD<Integer>> rdds = new LinkedList<>(); rdds.add(rdd1); rdds.add(rdd2); rdds.add(rdd3); JavaDStream<Integer> stream = ssc.queueStream(rdds); JavaTestUtils.attachTestOutputStream(stream); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); }
@SuppressWarnings("unchecked") @Test public void testQueueStream() { ssc.stop(); // Create a new JavaStreamingContext without checkpointing SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("test") .set("spark.streaming.clock", "org.apache.spark.util.ManualClock"); ssc = new JavaStreamingContext(conf, new Duration(1000)); List<List<Integer>> expected = Arrays.asList( Arrays.asList(1,2,3), Arrays.asList(4,5,6), Arrays.asList(7,8,9)); JavaSparkContext jsc = new JavaSparkContext(ssc.ssc().sc()); JavaRDD<Integer> rdd1 = jsc.parallelize(Arrays.asList(1, 2, 3)); JavaRDD<Integer> rdd2 = jsc.parallelize(Arrays.asList(4, 5, 6)); JavaRDD<Integer> rdd3 = jsc.parallelize(Arrays.asList(7,8,9)); Queue<JavaRDD<Integer>> rdds = new LinkedList<>(); rdds.add(rdd1); rdds.add(rdd2); rdds.add(rdd3); JavaDStream<Integer> stream = ssc.queueStream(rdds); JavaTestUtils.attachTestOutputStream(stream); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); }