Tabnine Logo
JavaDStream.foreachRDD
Code IndexAdd Tabnine to your IDE (free)

How to use
foreachRDD
method
in
org.apache.spark.streaming.api.java.JavaDStream

Best Java code snippets using org.apache.spark.streaming.api.java.JavaDStream.foreachRDD (Showing top 20 results out of 315)

origin: databricks/learning-spark

public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
 accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
   public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
    Tuple4<Long, Long, Long, Long> stats =
 ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() {
   public Void call(JavaRDD<String> rdd) {
    List<String> currentIPAddresses = rdd.take(100);
origin: databricks/learning-spark

accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
  public Void call(JavaRDD<ApacheAccessLog> rdd) {
origin: databricks/learning-spark

windowDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
  public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
 Tuple4<Long, Long, Long, Long> contentSizeStats =
origin: co.cask.cdap/hydrator-spark-core2

public static <T> void foreachRDD(JavaDStream<T> stream, final Function2<JavaRDD<T>, Time, Void> func) {
 stream.foreachRDD(new VoidFunction2<JavaRDD<T>, Time>() {
  @Override
  public void call(JavaRDD<T> v1, Time v2) throws Exception {
   func.call(v1, v2);
  }
 });
}
origin: co.cask.cdap/hydrator-spark-core2

public static <T> void foreachRDD(JavaDStream<T> stream, final Function2<JavaRDD<T>, Time, Void> func) {
 stream.foreachRDD(new VoidFunction2<JavaRDD<T>, Time>() {
  @Override
  public void call(JavaRDD<T> v1, Time v2) throws Exception {
   func.call(v1, v2);
  }
 });
}
origin: org.springframework.xd/spring-xd-spark-streaming

@Override
public JavaDStream<String> process(JavaDStream<String> input) {
  input.foreachRDD(new Function<JavaRDD<String>, Void>() {
origin: org.apache.beam/beam-runners-spark

@Override
public void action() {
 // Force computation of DStream.
 dStream.foreachRDD(rdd -> rdd.foreach(TranslationUtils.<WindowedValue<T>>emptyVoidFunction()));
}
origin: XavientInformationSystems/Data-Ingestion-Platform

public <T> void compute(JavaDStream<T> twitterStream) {
  twitterStream.foreachRDD(rdd -> {
    List<Row> rows = new ArrayList<>();
    doMapToPair(rdd).top(topN,
        (Comparator<Tuple2<K, V>> & Serializable) (tuple1, tuple2) -> tuple1._2.compareTo(tuple2._2))
        .forEach(tuple -> rows.add(createRow(tuple)));
    
    rdbmsWriter.write(rows, schema, tableName);
  });
}
origin: jetoile/hadoop-unit

  public void run() {
    Map<String, Integer> topicMap = new HashMap<>();
    topicMap.put(topic, 1);

    JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(
        scc,
        zkString,
        "groupId",
        topicMap);

    JavaDStream<String> messages = stream.map(r -> r._2());
    messages.foreachRDD(r -> {
        System.out.println("========================");
        System.out.println(r);
      });
  }
}
origin: dibbhatt/kafka-spark-consumer

@SuppressWarnings("deprecation")
public static void persists(DStream<Tuple2<Integer, Iterable<Long>>> partitonOffset, Properties props) {
 ClassTag<Tuple2<Integer, Iterable<Long>>> tuple2ClassTag = 
   ScalaUtil.<Integer, Iterable<Long>>getTuple2ClassTag();
 JavaDStream<Tuple2<Integer, Iterable<Long>>> jpartitonOffset = 
   new JavaDStream<Tuple2<Integer, Iterable<Long>>>(partitonOffset, tuple2ClassTag);
 jpartitonOffset.foreachRDD(new VoidFunction<JavaRDD<Tuple2<Integer, Iterable<Long>>>>() {
  @Override
  public void call(JavaRDD<Tuple2<Integer, Iterable<Long>>> po) throws Exception {
   List<Tuple2<Integer, Iterable<Long>>> poList = po.collect();
   doPersists(poList, props);
  }
 });
}
origin: XavientInformationSystems/Data-Ingestion-Platform

  public static <T> void write(JavaDStream<T> javaDStream, AppArgs appArgs) {
    javaDStream.foreachRDD(rdd -> {
      rdd.map(record -> {
        StringBuilder recordBuilder = new StringBuilder();
        
        for (Object e : (Object[]) record) {
          recordBuilder.append(e);
          recordBuilder.append(appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_DELIMITER));
        }
        return StringUtils.removeEnd(recordBuilder.toString(),
            appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_DELIMITER));
      }).saveAsTextFile(appArgs.getProperty(DiPConfiguration.CLUSTER_FS_URL)
          + appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_PATH) + System.currentTimeMillis());
    });
  }
}
origin: streampipes/streampipes-ce

@Override
public boolean execute(JavaDStream<Map<String, Object>>... convertedStream) {
  JavaDStream<Map<String, Object>> applicationLogic = getApplicationLogic(convertedStream);
  //applicationLogic.print();
  if (isOutputKafkaProtocol()) {
    applicationLogic.foreachRDD(SimpleKafkaSerializer.getInstance(kafkaParams, protocol().getTopicDefinition
        ().getActualTopicName()));
  }
  else {
    //TODO: JMS
  }
  thread = new Thread(this);
  thread.start();
  return true;
}
origin: gwenshap/kafka-examples

countAndSum.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Integer>>, Void>() {
  @Override
  public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception {
origin: aseigneurin/kafka-sandbox

public static void main(String[] args) {
  SparkConf conf = new SparkConf()
      .setAppName("kafka-sandbox")
      .setMaster("local[*]");
  JavaSparkContext sc = new JavaSparkContext(conf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
  Set<String> topics = Collections.singleton("mytopic");
  Map<String, String> kafkaParams = new HashMap<>();
  kafkaParams.put("metadata.broker.list", "localhost:9092");
  JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc,
      String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics);
  directKafkaStream
      .map(message -> recordInjection.invert(message._2).get())
      .foreachRDD(rdd -> {
        rdd.foreach(record -> {
          System.out.println("str1= " + record.get("str1")
              + ", str2= " + record.get("str2")
              + ", int1=" + record.get("int1"));
        });
      });
  ssc.start();
  ssc.awaitTermination();
}
origin: Stratio/Decision

private void configureDataContext(JavaStreamingContext context) {
  Map<String, Integer> baseTopicMap = new HashMap<>();
  configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1));
  kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext
      .getKafkaReplicationFactor(), configurationContext.getKafkaPartitions());
  HashMap<String, String> kafkaParams = new HashMap<>();
  kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath());
  kafkaParams.put("group.id", configurationContext.getGroupId());
   /*
   groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of
   the group.
   Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the
   same topic with the same groupId, only one instance will be able to read from the topic
   */
  JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class,
      kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap,
      StorageLevel.MEMORY_AND_DISK_SER());
  AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction();
  JavaDStream<StratioStreamingMessage>  insertRequests = messages.filter(
      new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT))
      .map(avroDeserializeMessageFunction);
  InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService,
      configurationContext.getZookeeperHostsQuorum());
  insertRequests.foreachRDD(insertIntoStreamFunction);
}
origin: jgperrin/net.jgp.labs.spark

 private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
    "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
    .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
    .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new RowProcessor());

  jssc.start();
  try {
   jssc.awaitTermination();
  } catch (InterruptedException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
}
origin: org.apache.spark/spark-streaming_2.11

 ssc.receiverStream(new JavaSocketReceiver("localhost", server.port()));
JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + ".");
mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> {
 long count = rdd.count();
 dataCounter.addAndGet(count);
origin: org.apache.spark/spark-streaming_2.11

@SuppressWarnings("unchecked")
@Test
public void testForeachRDD() {
 final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator();
 final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator();
 List<List<Integer>> inputData = Arrays.asList(
   Arrays.asList(1,1,1),
   Arrays.asList(1,1,1));
 JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output
 stream.foreachRDD(rdd -> {
  accumRdd.add(1);
  rdd.foreach(i -> accumEle.add(1));
 });
 // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java
 stream.foreachRDD((rdd, time) -> {});
 JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(2, accumRdd.value().intValue());
 Assert.assertEquals(6, accumEle.value().intValue());
}
origin: org.apache.spark/spark-streaming_2.10

 ssc.receiverStream(new JavaSocketReceiver("localhost", server.port()));
JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + ".");
mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> {
 long count = rdd.count();
 dataCounter.addAndGet(count);
origin: org.apache.spark/spark-streaming_2.10

@SuppressWarnings("unchecked")
@Test
public void testForeachRDD() {
 final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator();
 final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator();
 List<List<Integer>> inputData = Arrays.asList(
   Arrays.asList(1,1,1),
   Arrays.asList(1,1,1));
 JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
 JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output
 stream.foreachRDD(rdd -> {
  accumRdd.add(1);
  rdd.foreach(i -> accumEle.add(1));
 });
 // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java
 stream.foreachRDD((rdd, time) -> {});
 JavaTestUtils.runStreams(ssc, 2, 2);
 Assert.assertEquals(2, accumRdd.value().intValue());
 Assert.assertEquals(6, accumEle.value().intValue());
}
org.apache.spark.streaming.api.javaJavaDStreamforeachRDD

Popular methods of JavaDStream

  • map
  • mapToPair
  • union
  • filter
  • flatMap
  • dstream
  • countByValue
  • cache
  • transformToPair
  • window
  • count
  • transform
  • count,
  • transform,
  • countByValueAndWindow,
  • flatMapToPair,
  • print,
  • reduceByWindow,
  • repartition,
  • glom,
  • mapPartitions

Popular in Java

  • Reading from database using SQL prepared statement
  • requestLocationUpdates (LocationManager)
  • scheduleAtFixedRate (Timer)
  • getResourceAsStream (ClassLoader)
  • GridLayout (java.awt)
    The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
  • Scanner (java.util)
    A parser that parses a text string of primitive types and strings with the help of regular expressio
  • SortedSet (java.util)
    SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
  • ReentrantLock (java.util.concurrent.locks)
    A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
  • JOptionPane (javax.swing)
  • FileUtils (org.apache.commons.io)
    General file manipulation utilities. Facilities are provided in the following areas: * writing to a
  • Github Copilot alternatives
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now