org.apache.spark.streaming.api.java.JavaDStream.map java code examples

  Flags.getInstance().getWindowLength(),
  Flags.getInstance().getSlideInterval());
JavaDStream<String> ip = accessLogsDStream.map(
 new Function<ApacheAccessLog, String>() {
  public String call(ApacheAccessLog entry) {
  }});
JavaDStream<Long> requestCountRBW = accessLogsDStream.map(new Function<ApacheAccessLog, Long>() {
  public Long call(ApacheAccessLog entry) {
   return 1L;

= logData.map(new Functions.ParseFromLogLine()).cache();

final Iterable<WindowedValue<T>> windowedValues =
  StreamSupport.stream(timestampedValues.spliterator(), false)
    .map(
      timestampedValue ->
        WindowedValue.of(
  jssc.sparkContext()
    .parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder))
    .map(CoderHelpers.fromByteFunction(windowCoder));

/**
 * Method to get total traffic counts of different type of vehicles for each route.
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
  // We need to get count of vehicle group by routeId and vehicleType
  JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
      .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
      .reduceByKey((a, b) -> a + b);
  
  // Need to keep state for total count
  JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
      .mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
  // Transform to dstream of TrafficData
  JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
  JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);
  // Map Cassandra table column
  Map<String, String> columnNameMappings = new HashMap<String, String>();
  columnNameMappings.put("routeId", "routeid");
  columnNameMappings.put("vehicleType", "vehicletype");
  columnNameMappings.put("totalCount", "totalcount");
  columnNameMappings.put("timeStamp", "timestamp");
  columnNameMappings.put("recordDate", "recorddate");
  // call CassandraStreamingJavaUtil function to save in DB
  javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
      CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}

public JavaPairDStream<Integer, String> topProductViewsCountInLastOneHour(
    JavaDStream<String> fileStream) {
  JavaDStream<String> onlyQueryStringStream = fileStream
      .filter(new Function<String, Boolean>() {
        @Override
        public Boolean call(String eventString) throws Exception {
          LOG.debug("Filtering the incoming event stream: {}",
              eventString);
          String productIdString = getProductIdString(eventString);
          if (productIdString != null && productIdString != ""
              && productIdString != "null") {
            LOG.debug("Valid productid found : {}",
                productIdString);
            return true;
          }
          return false;
        }
      });
  JavaPairDStream<String, Integer> productIdCountsStream = onlyQueryStringStream
      .map(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String eventString) {
          String productIdString = getProductIdString(eventString);
          return new Tuple2<String, Integer>(productIdString, 1);
        }
      });
  return getSortedTopCount(productIdCountsStream);
}

public JavaPairDStream<Integer, String> topQueryStringsCountInLastOneHour(
    JavaDStream<String> fileStream) {
  JavaDStream<String> onlyQueryStringStream = fileStream
      .filter(new Function<String, Boolean>() {
        @Override
        public Boolean call(String eventString) throws Exception {
          LOG.debug("Filtering the incoming event stream: {}",
              eventString);
          String queryString = getQueryString(eventString);
          if (queryString != null && queryString != ""
              && queryString != "null") {
            LOG.debug("Valid querystring found : {}",
                queryString);
            return true;
          }
          return false;
        }
      });
  JavaPairDStream<String, Integer> queryStringStream = onlyQueryStringStream
      .map(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String eventString) {
          String queryString = getQueryString(eventString);
          return new Tuple2<String, Integer>(queryString, 1);
        }
      });
  return getSortedTopCount(queryStringStream);
}

.map(new PairFunction<SparkFlumeEvent, String, Integer>() {

.map(new PairFunction<SparkFlumeEvent, String, Integer>() {
  @Override
  public Tuple2<String, Integer> call(SparkFlumeEvent event) {

@Override
@SuppressWarnings("unchecked")
public void cache(String storageLevel, Coder<?> coder) {
 // we "force" MEMORY storage level in streaming
 if (!StorageLevel.fromString(storageLevel).equals(StorageLevel.MEMORY_ONLY_SER())) {
  LOG.warn(
    "Provided StorageLevel: {} is ignored for streams, using the default level: {}",
    storageLevel,
    StorageLevel.MEMORY_ONLY_SER());
 }
 // Caching can cause Serialization, we need to code to bytes
 // more details in https://issues.apache.org/jira/browse/BEAM-2669
 Coder<WindowedValue<T>> wc = (Coder<WindowedValue<T>>) coder;
 this.dStream =
   dStream.map(CoderHelpers.toByteFunction(wc)).cache().map(CoderHelpers.fromByteFunction(wc));
}

 public static void main(String[] args) throws Exception {
  if (args.length != 2) {
   System.err.println("Usage: JavaFlumeEventCount <host> <port>");
   System.exit(1);
  }

  String host = args[0];
  int port = Integer.parseInt(args[1]);

  Duration batchInterval = new Duration(2000);
  SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount");
  JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval);
  JavaReceiverInputDStream<SparkFlumeEvent> flumeStream =
   FlumeUtils.createStream(ssc, host, port);

  flumeStream.count();

  flumeStream.count().map(in -> "Received " + in + " flume events.").print();

  ssc.start();
  ssc.awaitTermination();
 }
}

@SuppressWarnings("unchecked")
@Test
public void testMap() {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("hello", "world"),
   Arrays.asList("goodnight", "moon"));
 List<List<Integer>> expected = Arrays.asList(
   Arrays.asList(5,5),
   Arrays.asList(9,4));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 assertOrderInvariantEquals(expected, result);
}

@Test
public void testMap() {
 List<List<String>> inputData = Arrays.asList(
  Arrays.asList("hello", "world"),
  Arrays.asList("goodnight", "moon"));
 List<List<Integer>> expected = Arrays.asList(
  Arrays.asList(5, 5),
  Arrays.asList(9, 4));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 assertOrderInvariantEquals(expected, result);
}

@Test
public void testMap() {
 List<List<String>> inputData = Arrays.asList(
  Arrays.asList("hello", "world"),
  Arrays.asList("goodnight", "moon"));
 List<List<Integer>> expected = Arrays.asList(
  Arrays.asList(5, 5),
  Arrays.asList(9, 4));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 assertOrderInvariantEquals(expected, result);
}

@SuppressWarnings("unchecked")
@Test
public void testMap() {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("hello", "world"),
   Arrays.asList("goodnight", "moon"));
 List<List<Integer>> expected = Arrays.asList(
   Arrays.asList(5,5),
   Arrays.asList(9,4));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 assertOrderInvariantEquals(expected, result);
}

@SuppressWarnings("unchecked")
@Test
public void testCheckpointMasterRecovery() throws InterruptedException {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("this", "is"),
   Arrays.asList("a", "test"),
   Arrays.asList("counting", "letters"));
 List<List<Integer>> expectedInitial = Arrays.asList(
   Arrays.asList(4,2));
 List<List<Integer>> expectedFinal = Arrays.asList(
   Arrays.asList(1,4),
   Arrays.asList(8,7));
 File tempDir = Files.createTempDir();
 tempDir.deleteOnExit();
 ssc.checkpoint(tempDir.getAbsolutePath());
 JavaDStream<String> stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaCheckpointTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> initialResult = JavaTestUtils.runStreams(ssc, 1, 1);
 assertOrderInvariantEquals(expectedInitial, initialResult);
 Thread.sleep(1000);
 ssc.stop();
 ssc = new JavaStreamingContext(tempDir.getAbsolutePath());
 // Tweak to take into consideration that the last batch before failure
 // will be re-processed after recovery
 List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3);
 assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3));
 ssc.stop();
 Utils.deleteRecursively(tempDir);
}

@SuppressWarnings("unchecked")
@Test
public void testCheckpointMasterRecovery() throws InterruptedException {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("this", "is"),
   Arrays.asList("a", "test"),
   Arrays.asList("counting", "letters"));
 List<List<Integer>> expectedInitial = Arrays.asList(
   Arrays.asList(4,2));
 List<List<Integer>> expectedFinal = Arrays.asList(
   Arrays.asList(1,4),
   Arrays.asList(8,7));
 File tempDir = Files.createTempDir();
 tempDir.deleteOnExit();
 ssc.checkpoint(tempDir.getAbsolutePath());
 JavaDStream<String> stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaDStream<Integer> letterCount = stream.map(String::length);
 JavaCheckpointTestUtils.attachTestOutputStream(letterCount);
 List<List<Integer>> initialResult = JavaTestUtils.runStreams(ssc, 1, 1);
 assertOrderInvariantEquals(expectedInitial, initialResult);
 Thread.sleep(1000);
 ssc.stop();
 ssc = new JavaStreamingContext(tempDir.getAbsolutePath());
 // Tweak to take into consideration that the last batch before failure
 // will be re-processed after recovery
 List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3);
 assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3));
 ssc.stop();
 Utils.deleteRecursively(tempDir);
}

 private <K, S, T> void testOperation(
   List<List<K>> input,
   StateSpec<K, Integer, S, T> mapWithStateSpec,
   List<Set<T>> expectedOutputs,
   List<Set<Tuple2<K, S>>> expectedStateSnapshots) {
  int numBatches = expectedOutputs.size();
  JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2);
  JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream(
   inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec);

  List<Set<T>> collectedOutputs =
    Collections.synchronizedList(new ArrayList<Set<T>>());
  mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect())));
  List<Set<Tuple2<K, S>>> collectedStateSnapshots =
    Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>());
  mapWithStateDStream.stateSnapshots().foreachRDD(rdd ->
    collectedStateSnapshots.add(Sets.newHashSet(rdd.collect())));
  BatchCounter batchCounter = new BatchCounter(ssc.ssc());
  ssc.start();
  ((ManualClock) ssc.ssc().scheduler().clock())
    .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1);
  batchCounter.waitUntilBatchesCompleted(numBatches, 10000);

  Assert.assertEquals(expectedOutputs, collectedOutputs);
  Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots);
 }
}

 private <K, S, T> void testOperation(
   List<List<K>> input,
   StateSpec<K, Integer, S, T> mapWithStateSpec,
   List<Set<T>> expectedOutputs,
   List<Set<Tuple2<K, S>>> expectedStateSnapshots) {
  int numBatches = expectedOutputs.size();
  JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2);
  JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream(
   inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec);

  List<Set<T>> collectedOutputs =
    Collections.synchronizedList(new ArrayList<Set<T>>());
  mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect())));
  List<Set<Tuple2<K, S>>> collectedStateSnapshots =
    Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>());
  mapWithStateDStream.stateSnapshots().foreachRDD(rdd ->
    collectedStateSnapshots.add(Sets.newHashSet(rdd.collect())));
  BatchCounter batchCounter = new BatchCounter(ssc.ssc());
  ssc.start();
  ((ManualClock) ssc.ssc().scheduler().clock())
    .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1);
  batchCounter.waitUntilBatchesCompleted(numBatches, 10000);

  Assert.assertEquals(expectedOutputs, collectedOutputs);
  Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots);
 }
}

  mapWithStateDStream
    .flatMap(new Tuple2byteFlatMapFunction())
    .map(CoderHelpers.fromByteFunction(coder));
return new UnboundedDataset<>(readUnboundedStream, Collections.singletonList(id));

 .map(new WrapOutputTransformFunction<>(stageSpec.getName()));
return new DStreamCollection<>(sec, outputDStream);

Popular methods of JavaDStream

Popular in Java

Finding current android device location
putExtra (Intent)
onCreateOptionsMenu (Activity)
startActivity (Activity)
Permission (java.security)
Legacy security code; do not use.
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Notification (javax.management)
Top PhpStorm plugins

How to use mapmethodin org.apache.spark.streaming.api.java.JavaDStream

Best Java code snippets using org.apache.spark.streaming.api.java.JavaDStream.map (Showing top 20 results out of 315)

How to use
map
method
in
org.apache.spark.streaming.api.java.JavaDStream