/** * Method to get total traffic counts of different type of vehicles for each route. * * @param filteredIotDataStream IoT data stream */ public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) { // We need to get count of vehicle group by routeId and vehicleType JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L)) .reduceByKey((a, b) -> a + b); // Need to keep state for total count JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair .mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour // Transform to dstream of TrafficData JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2); JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc); // Map Cassandra table column Map<String, String> columnNameMappings = new HashMap<String, String>(); columnNameMappings.put("routeId", "routeid"); columnNameMappings.put("vehicleType", "vehicletype"); columnNameMappings.put("totalCount", "totalcount"); columnNameMappings.put("timeStamp", "timestamp"); columnNameMappings.put("recordDate", "recorddate"); // call CassandraStreamingJavaUtil function to save in DB javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic", CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra(); }
.mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
streamOfPairs.mapWithState(StateSpec.function(mappingFunction));
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }
wordsDstream.mapWithState( StateSpec.function(mappingFunc) .initialState(initialRDD) wordsDstream.mapWithState( StateSpec.function(mappingFunc2) .initialState(initialRDD)
wordsDstream.mapWithState( StateSpec.function(mapFn) .initialState(initialRDD) wordsDstream.mapWithState( StateSpec.function(mapFn2) .initialState(initialRDD)
wordsDstream.mapWithState( StateSpec.function(mappingFunc) .initialState(initialRDD) wordsDstream.mapWithState( StateSpec.function(mappingFunc2) .initialState(initialRDD)
wordsDstream.mapWithState( StateSpec.function(mapFn) .initialState(initialRDD) wordsDstream.mapWithState( StateSpec.function(mapFn2) .initialState(initialRDD)