Tabnine Logo
JavaDStream.mapToPair
Code IndexAdd Tabnine to your IDE (free)

How to use
mapToPair
method
in
org.apache.spark.streaming.api.java.JavaDStream

Best Java code snippets using org.apache.spark.streaming.api.java.JavaDStream.mapToPair (Showing top 17 results out of 315)

origin: databricks/learning-spark

JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple());
JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer());
JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple());
JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer());
JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
origin: databricks/learning-spark

requestCountRBW.print();
JavaPairDStream<String, Long> ipAddressPairDStream = accessLogsDStream.mapToPair(
 new PairFunction<ApacheAccessLog, String, Long>() {
  public Tuple2<String, Long> call(ApacheAccessLog entry) {
origin: org.springframework.xd/spring-xd-spark-streaming

@Override
public JavaPairDStream<String, Integer> process(JavaDStream<String> input) {
  JavaDStream<String> words = input.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public Iterable<String> call(String x) {
      return Arrays.asList(x.split(" "));
    }
  });
  JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
    @Override
    public Tuple2<String, Integer> call(String s) {
      return new Tuple2<String, Integer>(s, 1);
    }
  }).reduceByKey(new Function2<Integer, Integer, Integer>() {
    @Override
    public Integer call(Integer i1, Integer i2) {
      return i1 + i2;
    }
  });
  return wordCounts;
}
origin: ebi-wp/kafka-streams-api-websockets

.mapToPair((String s) -> new Tuple2<>(s, 1))
.reduceByKey((Integer i1, Integer i2)-> i1 + i2);
origin: io.zipkin.sparkstreaming/zipkin-sparkstreaming

static void streamSpansToStorage(
  JavaDStream<byte[]> stream,
  ReadSpans readSpans,
  AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId
) {
 JavaDStream<Span> spans = stream.flatMap(readSpans);
 // TODO: plug in some filter to drop spans regardless of trace ID
 // spans = spans.filter(spanFilter);
 JavaPairDStream<String, Iterable<Span>> tracesById = spans
   .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s))
   .groupByKey();
 tracesById.foreachRDD(rdd -> {
  rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId);
 });
}
origin: baghelamit/iot-traffic-monitor

/**
 * Method to get window traffic counts of different type of vehicles for each route.
 * Window duration = 30 seconds and Slide interval = 10 seconds
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
  // reduce by key and window (30 sec window and 10 sec slide).
  JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
      .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
      .reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));
  // Transform to dstream of TrafficData
  JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);
  // Map Cassandra table column
  Map<String, String> columnNameMappings = new HashMap<String, String>();
  columnNameMappings.put("routeId", "routeid");
  columnNameMappings.put("vehicleType", "vehicletype");
  columnNameMappings.put("totalCount", "totalcount");
  columnNameMappings.put("timeStamp", "timestamp");
  columnNameMappings.put("recordDate", "recorddate");
  // call CassandraStreamingJavaUtil function to save in DB
  javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
      CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}
origin: org.apache.spark/spark-streaming_2.10

@Test
public void testPairFilter() {
 List<List<String>> inputData = Arrays.asList(
  Arrays.asList("giants", "dodgers"),
  Arrays.asList("yankees", "red sox"));
 List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
  Arrays.asList(new Tuple2<>("giants", 6)),
  Arrays.asList(new Tuple2<>("yankees", 7)));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaPairDStream<String, Integer> pairStream =
  stream.mapToPair(x -> new Tuple2<>(x, x.length()));
 JavaPairDStream<String, Integer> filtered = pairStream.filter(x -> x._1().contains("a"));
 JavaTestUtils.attachTestOutputStream(filtered);
 List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(expected, result);
}
origin: org.apache.spark/spark-streaming_2.11

@Test
public void testPairFilter() {
 List<List<String>> inputData = Arrays.asList(
  Arrays.asList("giants", "dodgers"),
  Arrays.asList("yankees", "red sox"));
 List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
  Arrays.asList(new Tuple2<>("giants", 6)),
  Arrays.asList(new Tuple2<>("yankees", 7)));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaPairDStream<String, Integer> pairStream =
  stream.mapToPair(x -> new Tuple2<>(x, x.length()));
 JavaPairDStream<String, Integer> filtered = pairStream.filter(x -> x._1().contains("a"));
 JavaTestUtils.attachTestOutputStream(filtered);
 List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(expected, result);
}
origin: org.apache.spark/spark-streaming_2.10

@SuppressWarnings("unchecked")
@Test
public void testPairFilter() {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("giants", "dodgers"),
   Arrays.asList("yankees", "red sox"));
 List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
   Arrays.asList(new Tuple2<>("giants", 6)),
   Arrays.asList(new Tuple2<>("yankees", 7)));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaPairDStream<String, Integer> pairStream =
   stream.mapToPair(in -> new Tuple2<>(in, in.length()));
 JavaPairDStream<String, Integer> filtered = pairStream.filter(in -> in._1().contains("a"));
 JavaTestUtils.attachTestOutputStream(filtered);
 List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(expected, result);
}
origin: org.apache.spark/spark-streaming_2.11

@SuppressWarnings("unchecked")
@Test
public void testPairFilter() {
 List<List<String>> inputData = Arrays.asList(
   Arrays.asList("giants", "dodgers"),
   Arrays.asList("yankees", "red sox"));
 List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
   Arrays.asList(new Tuple2<>("giants", 6)),
   Arrays.asList(new Tuple2<>("yankees", 7)));
 JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaPairDStream<String, Integer> pairStream =
   stream.mapToPair(in -> new Tuple2<>(in, in.length()));
 JavaPairDStream<String, Integer> filtered = pairStream.filter(in -> in._1().contains("a"));
 JavaTestUtils.attachTestOutputStream(filtered);
 List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(expected, result);
}
origin: baghelamit/iot-traffic-monitor

/**
 * Method to get total traffic counts of different type of vehicles for each route.
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
  // We need to get count of vehicle group by routeId and vehicleType
  JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
      .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
      .reduceByKey((a, b) -> a + b);
  
  // Need to keep state for total count
  JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
      .mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
  // Transform to dstream of TrafficData
  JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
  JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);
  // Map Cassandra table column
  Map<String, String> columnNameMappings = new HashMap<String, String>();
  columnNameMappings.put("routeId", "routeid");
  columnNameMappings.put("vehicleType", "vehicletype");
  columnNameMappings.put("totalCount", "totalcount");
  columnNameMappings.put("timeStamp", "timestamp");
  columnNameMappings.put("recordDate", "recorddate");
  // call CassandraStreamingJavaUtil function to save in DB
  javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
      CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}
origin: ypriverol/spark-java8

JavaPairDStream<String, Integer> tuples = hashTags.mapToPair(l -> new Tuple2<>(l.substring(1).toLowerCase(), 1));
origin: baghelamit/iot-traffic-monitor

.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));
origin: org.apache.spark/spark-streaming-kinesis-asl

JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
  new PairFunction<String, String, Integer>() {
   @Override
origin: baghelamit/iot-traffic-monitor

JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );
origin: spirom/learning-spark-with-java

streamOfItems.mapToPair(si ->
  new Tuple2<>(si.getCategory(), si));
origin: spirom/learning-spark-with-java

streamOfItems.mapToPair(si ->
  new Tuple2<>(si.getCategory(), si));
org.apache.spark.streaming.api.javaJavaDStreammapToPair

Popular methods of JavaDStream

  • foreachRDD
  • map
  • union
  • filter
  • flatMap
  • dstream
  • countByValue
  • cache
  • transformToPair
  • window
  • count
  • transform
  • count,
  • transform,
  • countByValueAndWindow,
  • flatMapToPair,
  • print,
  • reduceByWindow,
  • repartition,
  • glom,
  • mapPartitions

Popular in Java

  • Parsing JSON documents to java classes using gson
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • setScale (BigDecimal)
  • putExtra (Intent)
  • NoSuchElementException (java.util)
    Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
  • TreeMap (java.util)
    Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
  • ReentrantLock (java.util.concurrent.locks)
    A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
  • Collectors (java.util.stream)
  • Modifier (javassist)
    The Modifier class provides static methods and constants to decode class and member access modifiers
  • Reflections (org.reflections)
    Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
  • Top PhpStorm plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now