Tabnine Logo
JavaDStream
Code IndexAdd Tabnine to your IDE (free)

How to use
JavaDStream
in
org.apache.spark.streaming.api.java

Best Java code snippets using org.apache.spark.streaming.api.java.JavaDStream (Showing top 20 results out of 369)

Refine searchRefine arrow

  • JavaPairDStream
  • Duration
  • JavaStreamingContext
origin: databricks/learning-spark

JavaStreamingContext jssc = new JavaStreamingContext(conf,
  Flags.getInstance().getSlideInterval());
jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());
JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());
 = logData.map(new Functions.ParseFromLogLine()).cache();
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
  public Void call(JavaRDD<ApacheAccessLog> rdd) {
origin: databricks/learning-spark

accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
  public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
   Tuple4<Long, Long, Long, Long> stats =
JavaPairDStream<Integer, Long> responseCodeCountDStream = accessLogsDStream.transformToPair(
 new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() {
  public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> rdd) {
   return Functions.responseCodeCount(rdd);
  }})
 .updateStateByKey(new Functions.ComputeRunningSum());
responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() {
  public Void call(JavaPairRDD<Integer, Long> rdd) {
   currentResponseCodeCounts = rdd.take(100);
JavaPairDStream<String, Long> ipRawDStream = accessLogsDStream.transformToPair(
 new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>(){
 public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple());
JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer());
JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple());
JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer());
JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() {
  public Void call(JavaRDD<String> rdd) {
   List<String> currentIPAddresses = rdd.take(100);
origin: databricks/learning-spark

 public static void main(String[] args) throws Exception {
    String master = args[0];
    JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
  // Create a StreamingContext with a 1 second batch size
  JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
  // Create a DStream from all the input on port 7777
  JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
  // Filter our DStream for lines with "error"
  JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
    public Boolean call(String line) {
     return line.contains("error");
    }});
  // Print out the lines with errors, which causes this DStream to be evaluated
  errorLines.print();
  // start our streaming context and wait for it to "finish"
  jssc.start();
  // Wait for 10 seconds then exit. To run forever call without a timeout
  jssc.awaitTermination(10000);
  // Stop the streaming context
  jssc.stop();
  }
}
origin: databricks/learning-spark

public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
 JavaDStream<ApacheAccessLog> windowDStream = accessLogsDStream.window(
   Flags.getInstance().getWindowLength(),
   Flags.getInstance().getSlideInterval());
 JavaDStream<String> ip = accessLogsDStream.map(
  new Function<ApacheAccessLog, String>() {
   public String call(ApacheAccessLog entry) {
 JavaDStream<Long> requestCountRBW = accessLogsDStream.map(new Function<ApacheAccessLog, Long>() {
   public Long call(ApacheAccessLog entry) {
    return 1L;
   }}).reduceByWindow(new Function2<Long, Long, Long>() {
     public Long call(Long v1, Long v2) {
      return v1+v2;
      return v1-v2;
     }}, Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
 requestCountRBW.print();
 JavaPairDStream<String, Long> ipAddressPairDStream = accessLogsDStream.mapToPair(
  new PairFunction<ApacheAccessLog, String, Long>() {
   public Tuple2<String, Long> call(ApacheAccessLog entry) {
    return new Tuple2(entry.getIpAddress(), 1L);
   }});
 JavaPairDStream<String, Long> ipCountDStream = ipAddressPairDStream.reduceByKeyAndWindow(
 JavaDStream<Long> requestCount = accessLogsDStream.countByWindow(
  Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
 JavaPairDStream<String, Long> ipAddressRequestCount = ip.countByValueAndWindow(
  Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
origin: gwenshap/kafka-examples

JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000));
JavaDStream<Tuple2<Integer,Integer>> countAndSum = nums.reduce(new Function2<Tuple2<Integer,Integer>, Tuple2<Integer,Integer>, Tuple2<Integer,Integer>>() {
  @Override
  public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> a, Tuple2<Integer, Integer> b) {
countAndSum.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Integer>>, Void>() {
  @Override
  public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception {
ssc.start();
ssc.awaitTermination();
origin: wankunde/logcount

JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(1000));
    return tuple2._2();
}).filter(new Function<String, Boolean>() {
  private static final long serialVersionUID = 7786877762996470593L;
JavaDStream<Long> nums = lines.count();
nums.foreachRDD(new Function<JavaRDD<Long>, Void>() {
ssc.start();
ssc.awaitTermination();
origin: org.apache.spark/spark-streaming_2.11

JavaStreamingContext ssc = new JavaStreamingContext("local[2]", "test", new Duration(200));
JavaReceiverInputDStream<String> input =
 ssc.receiverStream(new JavaSocketReceiver("localhost", server.port()));
JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + ".");
mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> {
 long count = rdd.count();
 dataCounter.addAndGet(count);
});
ssc.start();
long startTime = System.currentTimeMillis();
long timeout = 10000;
origin: org.apache.spark/spark-streaming-kafka-0-8

).map(
  new Function<Tuple2<String, String>, String>() {
   @Override
JavaDStream<String> unifiedStream = stream1.union(stream2);
unifiedStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
   @Override
   public void call(JavaRDD<String> rdd) {
ssc.start();
long startTime = System.currentTimeMillis();
boolean matches = false;
ssc.stop();
origin: ypriverol/spark-java8

JavaStreamingContext sc = new JavaStreamingContext(sparkcontext, new Duration(5000));
JavaDStream<String> words = statuses.flatMap(l -> Arrays.asList(l.split(" ")).iterator());
JavaDStream<String> hashTags = words.filter((Function<String, Boolean>) word -> word.startsWith("#"));
JavaPairDStream<String, Integer> tuples = hashTags.mapToPair(l -> new Tuple2<>(l.substring(1).toLowerCase(), 1));
JavaPairDStream<String, Integer> counts = tuples.reduceByKeyAndWindow(
    (Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2,
    (Function2<Integer, Integer, Integer>) (i1, i2) -> i1 - i2,
    new Duration(60 * 5 * 1000),    /* Window Length */
    new Duration(60 * 5 * 1000)     /* Sliding Interval */
);
JavaPairDStream<Integer, String> swappedCounts = counts.mapToPair(
    (PairFunction<Tuple2<String, Integer>, Integer, String>) in -> in.swap()
);
JavaPairDStream<Integer, String> sortedCounts = swappedCounts.transformToPair(
    (Function<JavaPairRDD<Integer, String>, JavaPairRDD<Integer, String>>) in -> in.sortByKey(false)
);
});
sc.checkpoint("./hdfs/");
sc.start();
sc.awaitTermination();
origin: spirom/learning-spark-with-java

JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
checkpointDir.mkdir();
checkpointDir.deleteOnExit();
ssc.checkpoint(checkpointPath);
JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath());
streamOfRecords.foreachRDD(rdd -> {
 long records = rdd.count();
 System.out.println("[1] Records in this RDD: " + records);
origin: jgperrin/net.jgp.labs.spark

 private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
    "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
    .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
    .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new RowProcessor());

  jssc.start();
  try {
   jssc.awaitTermination();
  } catch (InterruptedException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
}
origin: spirom/learning-spark-with-java

JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
checkpointDir.mkdir();
checkpointDir.deleteOnExit();
ssc.checkpoint(checkpointPath);
JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath());
JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s));
  streamOfItems.mapToPair(si ->
    new Tuple2<>(si.getCategory(), si));
  streamOfPairs.mapWithState(StateSpec.function(mappingFunction));
origin: spirom/learning-spark-with-java

JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath());
JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s));
  streamOfItems.filter(item -> item.getCategory() == StreamingItem.Category.MEDIUM);
streamOfMediumEntries.foreachRDD(rdd -> System.out.println("Item count = " + rdd.count()));
ssc.start();
origin: spirom/learning-spark-with-java

JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath());
streamOfRecords.foreachRDD((rdd, timeStamp) -> {
streamOfRecords.count().foreachRDD((rdd, timeStamp) ->
 rdd.foreach(countValue ->
JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s));
streamOfItems.foreachRDD(rdd -> {
streamOfItems.foreachRDD(rdd -> {
ssc.start();
origin: spirom/learning-spark-with-java

JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
JavaDStream<String> streamOfRecords = ssc.textFileStream(fm.getDestination().getAbsolutePath());
JavaDStream<StreamingItem> streamOfItems = streamOfRecords.map(s -> new StreamingItem(s));
  streamOfItems.mapToPair(si ->
    new Tuple2<>(si.getCategory(), si));
  streamOfPairs.combineByKey(createCombinerFunction, mergeValueFunction, mergeCombinersFunction,
   new HashPartitioner(4));
streamOfCategoryCounts.foreachRDD(rdd -> {
 System.out.println("Batch size: " + rdd.count());
 rdd.foreach(e -> System.out.println(e));
ssc.start();
origin: baghelamit/iot-traffic-monitor

    .set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive"));		 
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));	
jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir"));
JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );
         .mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
         .filter(tuple -> tuple._2.equals(Boolean.FALSE));
JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1);
filteredIotDataStream.cache();
Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
origin: org.apache.spark/spark-streaming-kinesis-asl

Duration batchInterval = new Duration(2000);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
 unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
} else {
JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
 @Override
 public Iterator<String> call(byte[] line) {
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
  new PairFunction<String, String, Integer>() {
   @Override
).reduceByKey(
  new Function2<Integer, Integer, Integer>() {
   @Override
wordCounts.print();
jssc.start();
jssc.awaitTermination();
origin: mvalleavila/Kafka-Spark-Hbase-Example

Duration batchInterval = new Duration(2000);
JavaStreamingContext sc = new JavaStreamingContext(master,
    "KafkaEventCount", batchInterval,
    System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(SparkStreamingFromKafkaToHBaseExample.class));
final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
 @Override
 public String call(Tuple2<String, String> tuple2) {
JavaDStream<String> words = lines.flatMap(new FlatMapFunction <String, String>(){
  @Override
  public Iterable<String> call(String x) {
JavaPairDStream<String, Integer> wordCounts = words.map(
     new PairFunction<String, String, Integer>() {
      @Override
       return new Tuple2<String, Integer>(s, 1);
     }).reduceByKey(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer i1, Integer i2) {
wordCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {
origin: dibbhatt/kafka-spark-consumer

JavaStreamingContext jsc = new JavaStreamingContext(_sparkConf, Durations.seconds(30));
unionStreams.foreachRDD(new VoidFunction<JavaRDD<MessageAndMetadata<byte[]>>>() {
 @Override
 public void call(JavaRDD<MessageAndMetadata<byte[]>> rdd) throws Exception {
 jsc.start();
 jsc.awaitTermination();
}catch (Exception ex ) {
 jsc.ssc().sc().cancelAllJobs();
origin: org.apache.spark/spark-streaming-flume

 public static void main(String[] args) throws Exception {
  if (args.length != 2) {
   System.err.println("Usage: JavaFlumeEventCount <host> <port>");
   System.exit(1);
  }

  String host = args[0];
  int port = Integer.parseInt(args[1]);

  Duration batchInterval = new Duration(2000);
  SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount");
  JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval);
  JavaReceiverInputDStream<SparkFlumeEvent> flumeStream =
   FlumeUtils.createStream(ssc, host, port);

  flumeStream.count();

  flumeStream.count().map(in -> "Received " + in + " flume events.").print();

  ssc.start();
  ssc.awaitTermination();
 }
}
org.apache.spark.streaming.api.javaJavaDStream

Most used methods

  • foreachRDD
  • map
  • mapToPair
  • union
  • filter
  • flatMap
  • dstream
  • countByValue
  • cache
  • transformToPair
  • window
  • count
  • window,
  • count,
  • transform,
  • countByValueAndWindow,
  • flatMapToPair,
  • print,
  • reduceByWindow,
  • repartition,
  • glom,
  • mapPartitions

Popular in Java

  • Reactive rest calls using spring rest template
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • putExtra (Intent)
  • getSystemService (Context)
  • Table (com.google.common.collect)
    A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
  • BufferedInputStream (java.io)
    A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
  • Thread (java.lang)
    A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
  • ConcurrentHashMap (java.util.concurrent)
    A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
  • JList (javax.swing)
  • Base64 (org.apache.commons.codec.binary)
    Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
  • CodeWhisperer alternatives
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now