org.apache.spark.streaming.Durations java code examples

private static void initializeStreamingJob() {
 int batchMilliseconds = INSTANCE.config.getInt(BATCH_MILLISECONDS_PROPERTY);
 final Duration batchDuration = Durations.milliseconds(batchMilliseconds);
 JavaStreamingContext jsc = new JavaStreamingContext(new JavaSparkContext(getSparkSession().sparkContext()),
   batchDuration);
 INSTANCE.jsc = jsc;
}

/**
 * Method to get window traffic counts of different type of vehicles for each route.
 * Window duration = 30 seconds and Slide interval = 10 seconds
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
  // reduce by key and window (30 sec window and 10 sec slide).
  JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
      .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
      .reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));
  // Transform to dstream of TrafficData
  JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);
  // Map Cassandra table column
  Map<String, String> columnNameMappings = new HashMap<String, String>();
  columnNameMappings.put("routeId", "routeid");
  columnNameMappings.put("vehicleType", "vehicletype");
  columnNameMappings.put("totalCount", "totalcount");
  columnNameMappings.put("timeStamp", "timestamp");
  columnNameMappings.put("recordDate", "recorddate");
  // call CassandraStreamingJavaUtil function to save in DB
  javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
      CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}

@Test
public void testMinutes() {
 Assert.assertEquals(new Duration(2 * 60 * 1000), Durations.minutes(2));
}

JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(5));

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

@Test
public void testMinutes() {
 Assert.assertEquals(new Duration(2 * 60 * 1000), Durations.minutes(2));
}

JavaStreamingContext jsc = new JavaStreamingContext(_sparkConf, Durations.seconds(30));

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

/**
 *  Load the data using TwitterUtils: we obtain a DStream of tweets
 *
 *  More about TwitterUtils:
 *  https://spark.apache.org/docs/1.4.0/api/java/index.html?org/apache/spark/streaming/twitter/TwitterUtils.html
 */
public JavaDStream<Status> loadData() {
 // create the spark configuration and spark context
 SparkConf conf = new SparkConf()
   .setAppName("Spark Streaming")
   .set("spark.driver.allowMultipleContexts", "true")
   .setMaster("local[*]");
 // create a java streaming context and define the window (2 seconds batch)
 jssc = new JavaStreamingContext(conf, Durations.seconds(2));
 System.out.println("Initializing Twitter stream...");
 // create a DStream (sequence of RDD). The object tweetsStream is a DStream of tweet statuses:
 // - the Status class contains all information of a tweet
 // See http://twitter4j.org/javadoc/twitter4j/Status.html
 // and fill the keys and tokens in the Streamutils class!
 JavaDStream<Status> tweetsStream = TwitterUtils.createStream(jssc, StreamUtils.getAuth());
 return tweetsStream;
}

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

 private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
    "NetworkWordCount");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
    .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
    .getInputDirectory());
  msgDataStream.print();

  jssc.start();
  try {
   jssc.awaitTermination();
  } catch (InterruptedException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
}

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

@Override
public SparkCollection<T> window(StageSpec stageSpec, Windower windower) {
 String stageName = stageSpec.getName();
 return wrap(stream.transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.in", null))
        .window(Durations.seconds(windower.getWidth()), Durations.seconds(windower.getSlideInterval()))
        .transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.out",
                              sec.getDataTracer(stageName))));
}

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

/**
 * Method to get total traffic counts of different type of vehicles for each route.
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
  // We need to get count of vehicle group by routeId and vehicleType
  JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
      .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
      .reduceByKey((a, b) -> a + b);
  
  // Need to keep state for total count
  JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
      .mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
  // Transform to dstream of TrafficData
  JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
  JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);
  // Map Cassandra table column
  Map<String, String> columnNameMappings = new HashMap<String, String>();
  columnNameMappings.put("routeId", "routeid");
  columnNameMappings.put("vehicleType", "vehicletype");
  columnNameMappings.put("totalCount", "totalcount");
  columnNameMappings.put("timeStamp", "timestamp");
  columnNameMappings.put("recordDate", "recorddate");
  // call CassandraStreamingJavaUtil function to save in DB
  javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
      CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

@Test
public void testSeconds() {
 Assert.assertEquals(new Duration(30 * 1000), Durations.seconds(30));
}

@Before
public void setUp() {
 kafkaTestUtils = new KafkaTestUtils();
 kafkaTestUtils.setup();
 SparkConf sparkConf = new SparkConf()
  .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
 ssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(200));
}

@Test
public void testSeconds() {
 Assert.assertEquals(new Duration(30 * 1000), Durations.seconds(30));
}

@Test
public void testMilliseconds() {
 Assert.assertEquals(new Duration(100), Durations.milliseconds(100));
}

Most used methods

Popular in Java

Creating JSON documents from java classes using gson
scheduleAtFixedRate (Timer)
getExternalFilesDir (Context)
requestLocationUpdates (LocationManager)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Top Vim plugins

How to useDurations in org.apache.spark.streaming

Best Java code snippets using org.apache.spark.streaming.Durations (Showing top 20 results out of 315)

How to use
Durations
in
org.apache.spark.streaming