Refine search
JavaStreamingContext jssc = new JavaStreamingContext(conf, Flags.getInstance().getSlideInterval()); jssc.checkpoint(Flags.getInstance().getCheckpointDirectory()); JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory()); = logData.map(new Functions.ParseFromLogLine()).cache(); accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() { public Void call(JavaRDD<ApacheAccessLog> rdd) {
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() { public Void call(JavaRDD<ApacheAccessLog> accessLogs) { Tuple4<Long, Long, Long, Long> stats = JavaPairDStream<Integer, Long> responseCodeCountDStream = accessLogsDStream.transformToPair( new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() { public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> rdd) { return Functions.responseCodeCount(rdd); }}) .updateStateByKey(new Functions.ComputeRunningSum()); responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() { public Void call(JavaPairRDD<Integer, Long> rdd) { currentResponseCodeCounts = rdd.take(100); JavaPairDStream<String, Long> ipRawDStream = accessLogsDStream.transformToPair( new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>(){ public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) { JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple()); JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer()); JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple()); JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer()); JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream); ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() { public Void call(JavaRDD<String> rdd) { List<String> currentIPAddresses = rdd.take(100);
public static void main(String[] args) throws Exception { String master = args[0]; JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() { public Boolean call(String line) { return line.contains("error"); }}); // Print out the lines with errors, which causes this DStream to be evaluated errorLines.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) { JavaDStream<ApacheAccessLog> windowDStream = accessLogsDStream.window( Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval()); JavaDStream<String> ip = accessLogsDStream.map( new Function<ApacheAccessLog, String>() { public String call(ApacheAccessLog entry) { JavaDStream<Long> requestCountRBW = accessLogsDStream.map(new Function<ApacheAccessLog, Long>() { public Long call(ApacheAccessLog entry) { return 1L; }}).reduceByWindow(new Function2<Long, Long, Long>() { public Long call(Long v1, Long v2) { return v1+v2; return v1-v2; }}, Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval()); requestCountRBW.print(); JavaPairDStream<String, Long> ipAddressPairDStream = accessLogsDStream.mapToPair( new PairFunction<ApacheAccessLog, String, Long>() { public Tuple2<String, Long> call(ApacheAccessLog entry) { return new Tuple2(entry.getIpAddress(), 1L); }}); JavaPairDStream<String, Long> ipCountDStream = ipAddressPairDStream.reduceByKeyAndWindow( JavaDStream<Long> requestCount = accessLogsDStream.countByWindow( Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval()); JavaPairDStream<String, Long> ipAddressRequestCount = ip.countByValueAndWindow( Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000)); JavaDStream<Tuple2<Integer,Integer>> countAndSum = nums.reduce(new Function2<Tuple2<Integer,Integer>, Tuple2<Integer,Integer>, Tuple2<Integer,Integer>>() { @Override public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> a, Tuple2<Integer, Integer> b) { countAndSum.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Integer>>, Void>() { @Override public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception { ssc.start(); ssc.awaitTermination();
JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(1000)); return tuple2._2(); }).filter(new Function<String, Boolean>() { private static final long serialVersionUID = 7786877762996470593L; JavaDStream<Long> nums = lines.count(); nums.foreachRDD(new Function<JavaRDD<Long>, Void>() { ssc.start(); ssc.awaitTermination();
JavaStreamingContext ssc = new JavaStreamingContext("local[2]", "test", new Duration(200)); JavaReceiverInputDStream<String> input = ssc.receiverStream(new JavaSocketReceiver("localhost", server.port())); JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + "."); mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { long count = rdd.count(); dataCounter.addAndGet(count); }); ssc.start(); long startTime = System.currentTimeMillis(); long timeout = 10000;
).map( new Function<Tuple2<String, String>, String>() { @Override JavaDStream<String> unifiedStream = stream1.union(stream2); unifiedStream.foreachRDD(new VoidFunction<JavaRDD<String>>() { @Override public void call(JavaRDD<String> rdd) { ssc.start(); long startTime = System.currentTimeMillis(); boolean matches = false; ssc.stop();
JavaStreamingContext sc = new JavaStreamingContext(sparkcontext, new Duration(5000)); JavaDStream<String> words = statuses.flatMap(l -> Arrays.asList(l.split(" ")).iterator()); JavaDStream<String> hashTags = words.filter((Function<String, Boolean>) word -> word.startsWith("#")); JavaPairDStream<String, Integer> tuples = hashTags.mapToPair(l -> new Tuple2<>(l.substring(1).toLowerCase(), 1)); JavaPairDStream<String, Integer> counts = tuples.reduceByKeyAndWindow( (Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2, (Function2<Integer, Integer, Integer>) (i1, i2) -> i1 - i2, new Duration(60 * 5 * 1000), /* Window Length */ new Duration(60 * 5 * 1000) /* Sliding Interval */ ); JavaPairDStream<Integer, String> swappedCounts = counts.mapToPair( (PairFunction<Tuple2<String, Integer>, Integer, String>) in -> in.swap() ); JavaPairDStream<Integer, String> sortedCounts = swappedCounts.transformToPair( (Function<JavaPairRDD<Integer, String>, JavaPairRDD<Integer, String>>) in -> in.sortByKey(false) ); }); sc.checkpoint("./hdfs/"); sc.start(); sc.awaitTermination();
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000)); checkpointDir.mkdir(); checkpointDir.deleteOnExit(); ssc.checkpoint(checkpointPath); JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath()); streamOfRecords.foreachRDD(rdd -> { long records = rdd.count(); System.out.println("[1] Records in this RDD: " + records);
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000)); checkpointDir.mkdir(); checkpointDir.deleteOnExit(); ssc.checkpoint(checkpointPath); JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath()); JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s)); streamOfItems.mapToPair(si -> new Tuple2<>(si.getCategory(), si)); streamOfPairs.mapWithState(StateSpec.function(mappingFunction));
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000)); JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath()); JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s)); streamOfItems.filter(item -> item.getCategory() == StreamingItem.Category.MEDIUM); streamOfMediumEntries.foreachRDD(rdd -> System.out.println("Item count = " + rdd.count())); ssc.start();
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000)); JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath()); streamOfRecords.foreachRDD((rdd, timeStamp) -> { streamOfRecords.count().foreachRDD((rdd, timeStamp) -> rdd.foreach(countValue -> JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s)); streamOfItems.foreachRDD(rdd -> { streamOfItems.foreachRDD(rdd -> { ssc.start();
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000)); JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath()); JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s)); streamOfItems.mapToPair(si -> new Tuple2<>(si.getCategory(), si)); streamOfPairs.combineByKey(createCombinerFunction, mergeValueFunction, mergeCombinersFunction, new HashPartitioner(4)); streamOfCategoryCounts.foreachRDD(rdd -> { System.out.println("Batch size: " + rdd.count()); rdd.foreach(e -> System.out.println(e)); ssc.start();
.set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive")); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5)); jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir")); JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a ); .mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour .filter(tuple -> tuple._2.equals(Boolean.FALSE)); JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1); filteredIotDataStream.cache(); Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
Duration batchInterval = new Duration(2000); JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval); unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); } else { JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() { @Override public Iterator<String> call(byte[] line) { JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override wordCounts.print(); jssc.start(); jssc.awaitTermination();
Duration batchInterval = new Duration(2000); JavaStreamingContext sc = new JavaStreamingContext(master, "KafkaEventCount", batchInterval, System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(SparkStreamingFromKafkaToHBaseExample.class)); final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName); final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { JavaDStream<String> words = lines.flatMap(new FlatMapFunction <String, String>(){ @Override public Iterable<String> call(String x) { JavaPairDStream<String, Integer> wordCounts = words.map( new PairFunction<String, String, Integer>() { @Override return new Tuple2<String, Integer>(s, 1); }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { wordCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {
JavaStreamingContext jsc = new JavaStreamingContext(_sparkConf, Durations.seconds(30)); unionStreams.foreachRDD(new VoidFunction<JavaRDD<MessageAndMetadata<byte[]>>>() { @Override public void call(JavaRDD<MessageAndMetadata<byte[]>> rdd) throws Exception { jsc.start(); jsc.awaitTermination(); }catch (Exception ex ) { jsc.ssc().sc().cancelAllJobs();
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: JavaFlumeEventCount <host> <port>"); System.exit(1); } String host = args[0]; int port = Integer.parseInt(args[1]); Duration batchInterval = new Duration(2000); SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval); JavaReceiverInputDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, host, port); flumeStream.count(); flumeStream.count().map(in -> "Received " + in + " flume events.").print(); ssc.start(); ssc.awaitTermination(); } }