pairDStream.foreachRDD( new BatchUpdateFunction<>(getConfig(), keyClass, pairDStream.foreachRDD(new SaveToHDFSFunction<>( dataDirString + "/oryx", "data", pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf, dataDirString, Pattern.compile("-(\\d+)\\."), pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf, modelDirString, Pattern.compile("(\\d+)"),
pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));
}}) .updateStateByKey(new Functions.ComputeRunningSum()); responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() { public Void call(JavaPairRDD<Integer, Long> rdd) { currentResponseCodeCounts = rdd.take(100); final Comparator<Long> cmp = (Comparator<Long>)ordering; endpointCountsDStream.foreachRDD(new Function<JavaPairRDD<String, Long>, Void>() { public Void call(JavaPairRDD<String, Long> rdd) { currentTopEndpoints = rdd.takeOrdered(
@SuppressWarnings("deprecation") public static void persists(JavaPairDStream<Integer, Iterable<Long>> partitonOffset, Properties props) { partitonOffset.foreachRDD(new VoidFunction<JavaPairRDD<Integer,Iterable<Long>>>() { @Override public void call(JavaPairRDD<Integer, Iterable<Long>> po) throws Exception { List<Tuple2<Integer, Iterable<Long>>> poList = po.collect(); doPersists(poList, props); } }); }
stringStringJavaPairDStream.foreachRDD(r -> { System.out.println("========================"); System.out.println(r);
static void streamSpansToStorage( JavaDStream<byte[]> stream, ReadSpans readSpans, AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId ) { JavaDStream<Span> spans = stream.flatMap(readSpans); // TODO: plug in some filter to drop spans regardless of trace ID // spans = spans.filter(spanFilter); JavaPairDStream<String, Iterable<Span>> tracesById = spans .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s)) .groupByKey(); tracesById.foreachRDD(rdd -> { rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId); }); }
pairDStream.foreachRDD( new BatchUpdateFunction<>(getConfig(), keyClass, pairDStream.foreachRDD(new SaveToHDFSFunction<>( dataDirString + "/oryx", "data", pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf, dataDirString, Pattern.compile("-(\\d+)\\."), pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf, modelDirString, Pattern.compile("(\\d+)"),
pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));
sortedCounts.foreachRDD( rdd -> {
words.countByValue().foreachRDD( new Function<JavaPairRDD<String, Long>, Void>() { @Override
words.countByValue().foreachRDD( new Function<JavaPairRDD<String, Long>, Void>() { @Override
words.countByValue().foreachRDD( new Function<JavaPairRDD<String, Long>, Void>() { @Override
words.countByValue().foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() { @Override public void call(JavaPairRDD<String, Long> rdd) {
words.countByValue().foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() { @Override public void call(JavaPairRDD<String, Long> rdd) {
if (saveToCassandraActionExecutionFunction.check()) { log.info("Cassandra is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_CASSANDRA)).foreachRDD( saveToCassandraActionExecutionFunction); } else { if (saveToMongoActionExecutionFunction.check()) { log.info("MongoDB is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_MONGO)).foreachRDD( saveToMongoActionExecutionFunction); } else { if (saveToElasticSearchActionExecutionFunction.check()) { log.info("ElasticSearch is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_ELASTICSEARCH)).foreachRDD(saveToElasticSearchActionExecutionFunction); } else { log.warn("ElasticSearch is NOT configured properly"); if (saveToSolrActionExecutionFunction.check()) { log.info("Solr is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_SOLR)).foreachRDD( saveToSolrActionExecutionFunction); } else { groupedDataDstream.filter(new FilterDataFunction(StreamAction.LISTEN)).foreachRDD( new SendToKafkaActionExecutionFunction(configurationContext.getKafkaHostsQuorum())); } catch (Exception e) {
new HashPartitioner(4)); streamOfCategoryCounts.foreachRDD(rdd -> { System.out.println("Batch size: " + rdd.count()); rdd.foreach(e -> System.out.println(e));
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }