JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple()); JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer()); JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple()); JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer()); JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
requestCountRBW.print(); JavaPairDStream<String, Long> ipAddressPairDStream = accessLogsDStream.mapToPair( new PairFunction<ApacheAccessLog, String, Long>() { public Tuple2<String, Long> call(ApacheAccessLog entry) {
@Override public JavaPairDStream<String, Integer> process(JavaDStream<String> input) { JavaDStream<String> words = input.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Arrays.asList(x.split(" ")); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); return wordCounts; }
.mapToPair((String s) -> new Tuple2<>(s, 1)) .reduceByKey((Integer i1, Integer i2)-> i1 + i2);
static void streamSpansToStorage( JavaDStream<byte[]> stream, ReadSpans readSpans, AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId ) { JavaDStream<Span> spans = stream.flatMap(readSpans); // TODO: plug in some filter to drop spans regardless of trace ID // spans = spans.filter(spanFilter); JavaPairDStream<String, Iterable<Span>> tracesById = spans .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s)) .groupByKey(); tracesById.foreachRDD(rdd -> { rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId); }); }
/** * Method to get window traffic counts of different type of vehicles for each route. * Window duration = 30 seconds and Slide interval = 10 seconds * * @param filteredIotDataStream IoT data stream */ public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) { // reduce by key and window (30 sec window and 10 sec slide). JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L)) .reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10)); // Transform to dstream of TrafficData JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc); // Map Cassandra table column Map<String, String> columnNameMappings = new HashMap<String, String>(); columnNameMappings.put("routeId", "routeid"); columnNameMappings.put("vehicleType", "vehicletype"); columnNameMappings.put("totalCount", "totalcount"); columnNameMappings.put("timeStamp", "timestamp"); columnNameMappings.put("recordDate", "recorddate"); // call CassandraStreamingJavaUtil function to save in DB javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic", CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra(); }
@Test public void testPairFilter() { List<List<String>> inputData = Arrays.asList( Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red sox")); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<>("giants", 6)), Arrays.asList(new Tuple2<>("yankees", 7))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = stream.mapToPair(x -> new Tuple2<>(x, x.length())); JavaPairDStream<String, Integer> filtered = pairStream.filter(x -> x._1().contains("a")); JavaTestUtils.attachTestOutputStream(filtered); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); }
@Test public void testPairFilter() { List<List<String>> inputData = Arrays.asList( Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red sox")); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<>("giants", 6)), Arrays.asList(new Tuple2<>("yankees", 7))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = stream.mapToPair(x -> new Tuple2<>(x, x.length())); JavaPairDStream<String, Integer> filtered = pairStream.filter(x -> x._1().contains("a")); JavaTestUtils.attachTestOutputStream(filtered); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); }
@SuppressWarnings("unchecked") @Test public void testPairFilter() { List<List<String>> inputData = Arrays.asList( Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red sox")); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<>("giants", 6)), Arrays.asList(new Tuple2<>("yankees", 7))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = stream.mapToPair(in -> new Tuple2<>(in, in.length())); JavaPairDStream<String, Integer> filtered = pairStream.filter(in -> in._1().contains("a")); JavaTestUtils.attachTestOutputStream(filtered); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); }
@SuppressWarnings("unchecked") @Test public void testPairFilter() { List<List<String>> inputData = Arrays.asList( Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red sox")); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<>("giants", 6)), Arrays.asList(new Tuple2<>("yankees", 7))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = stream.mapToPair(in -> new Tuple2<>(in, in.length())); JavaPairDStream<String, Integer> filtered = pairStream.filter(in -> in._1().contains("a")); JavaTestUtils.attachTestOutputStream(filtered); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); }
/** * Method to get total traffic counts of different type of vehicles for each route. * * @param filteredIotDataStream IoT data stream */ public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) { // We need to get count of vehicle group by routeId and vehicleType JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L)) .reduceByKey((a, b) -> a + b); // Need to keep state for total count JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair .mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour // Transform to dstream of TrafficData JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2); JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc); // Map Cassandra table column Map<String, String> columnNameMappings = new HashMap<String, String>(); columnNameMappings.put("routeId", "routeid"); columnNameMappings.put("vehicleType", "vehicletype"); columnNameMappings.put("totalCount", "totalcount"); columnNameMappings.put("timeStamp", "timestamp"); columnNameMappings.put("recordDate", "recorddate"); // call CassandraStreamingJavaUtil function to save in DB javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic", CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra(); }
JavaPairDStream<String, Integer> tuples = hashTags.mapToPair(l -> new Tuple2<>(l.substring(1).toLowerCase(), 1));
.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));
JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override
JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );
streamOfItems.mapToPair(si -> new Tuple2<>(si.getCategory(), si));
streamOfItems.mapToPair(si -> new Tuple2<>(si.getCategory(), si));