org.apache.spark.api.java.JavaPairRDD.foreach java code examples

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Override
public Void call(JavaPairRDD<String, Integer> values,
    Time time) throws Exception {
  
  values.foreach(new VoidFunction<Tuple2<String, Integer>> () {
    @Override
    public void call(Tuple2<String, Integer> tuple)
        throws Exception {
      HBaseCounterIncrementor incrementor = 
          HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
      incrementor.increment("Counter", tuple._1(), tuple._2());
      System.out.println("------------------------------- Counter:" + tuple._1() + "," + tuple._2());
      
    }} );
  
  return null;
}});

  deleteLocalFile(outputPath + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i), imageType);
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

  deleteHadoopFile(outputPath + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i) + ".", imageType);
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

  deleteHadoopFile(outputPath + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i) + ".", imageType);
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

  deleteLocalFile(outputPath + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i), imageType);
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

@Override
public void forEach(@NonNull SerializableBiConsumer<? super T, ? super U> consumer) {
 rdd.foreach(tuple -> {
   Configurator.INSTANCE.configure(configBroadcast.value());
   consumer.accept(tuple._1(), tuple._2());
 });
}

public static void main(String[] args) {
  // TODO Auto-generated method stub
  SparkConf conf = new SparkConf().setAppName("ImagenetSampler")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
  
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  int numExecutors = conf.getInt("spark.executor.instances", -1);
  System.out.println("number of executors = " + numExecutors);
  System.out.println("Data Loading...");
  JavaPairRDD<FloatWritable, ArrayPrimitiveWritable> train_seq = 
      sc.sequenceFile("imagenet_sampled.hsf", FloatWritable.class, ArrayPrimitiveWritable.class);
  
  train_seq.foreach(new VoidFunction<Tuple2<FloatWritable,ArrayPrimitiveWritable>>() {
    
    @Override
    public void call(Tuple2<FloatWritable, ArrayPrimitiveWritable> arg0) throws Exception {
      System.out.println(arg0._1.get() + " " + ((float[]) arg0._2.get()).length);
    }
  });
  sc.close();
}

 /**
  * The task body
  */
 public void run(String inputFilePath) {
  /*
   * This is the address of the Spark cluster. We will call the task from WordCountTest and we
   * use a local standalone cluster. [*] means use all the cores available.
   * See {@see http://spark.apache.org/docs/latest/submitting-applications.html#master-urls}.
   */
  String master = "local[*]";

  /*
   * Initialises a Spark context.
   */
  SparkConf conf = new SparkConf()
    .setAppName(WordCountTask.class.getName())
    .setMaster(master);
  JavaSparkContext context = new JavaSparkContext(conf);

  /*
   * Performs a work count sequence of tasks and prints the output with a logger.
   */
  context.textFile(inputFilePath)
    .flatMap(text -> Arrays.asList(text.split(" ")).iterator())
    .mapToPair(word -> new Tuple2<>(word, 1))
    .reduceByKey((a, b) -> a + b)
    .foreach(result -> LOGGER.info(
      String.format("Word [%s] count [%d].", result._1(), result._2)));
 }
}

extractSessionDetailRDD.foreach(new VoidFunction<Tuple2<String, Tuple2<String, Row>>>() {

  s3Operator.deleteImage(bucketName, path + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i) + "." + imageType.getTypeName());
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

  s3Operator.deleteImage(bucketName, path + "-" + RasterizationUtils.getImageTileName(zoomLevel, partitionOnX, partitionOnY, i) + "." + imageType.getTypeName());
distributedImage.foreach(new VoidFunction<Tuple2<Integer, ImageSerializableWrapper>>()

public static void main(String[] args) {
  SparkConf conf = new SparkConf()
      .setAppName("kafka-sandbox")
      .setMaster("local[*]");
  JavaSparkContext sc = new JavaSparkContext(conf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
  Set<String> topics = Collections.singleton("mytopic");
  Map<String, String> kafkaParams = new HashMap<>();
  kafkaParams.put("metadata.broker.list", "localhost:9092");
  JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
      String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
  directKafkaStream.foreachRDD(rdd -> {
    System.out.println("--- New RDD with " + rdd.partitions().size()
        + " partitions and " + rdd.count() + " records");
    rdd.foreach(record -> System.out.println(record._2));
  });
  ssc.start();
  ssc.awaitTermination();
}

pairsRDD.foreach(i -> System.out.println(i));
reducedRDD.foreach(i -> System.out.println(i));
foldedRDD.foreach(i -> System.out.println(i));
averageRDD.foreach(i -> System.out.println(i));
averageRDDtheHardWay.foreach(i -> System.out.println(i));

sessionDetailRDD.foreach(new VoidFunction<Tuple2<String,Tuple2<String,Row>>>() {

resultRDD.foreach(countryPathPair ->

 rdd.foreach(e -> System.out.println(e));
});

qapWitness.coefficientsH().filter(e -> e._1 >= qapWitness.degree() - 2).foreach(coeff -> {
  if (coeff._1 == qapWitness.degree() - 2) {
    assert (!coeff._2.equals(zero));

Popular methods of JavaPairRDD

Popular in Java

Reading from database using SQL prepared statement
addToBackStack (FragmentTransaction)
onRequestPermissionsResult (Fragment)
scheduleAtFixedRate (Timer)
Socket (java.net)
Provides a client-side TCP socket.
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Top Vim plugins

How to use foreachmethodin org.apache.spark.api.java.JavaPairRDD

Best Java code snippets using org.apache.spark.api.java.JavaPairRDD.foreach (Showing top 20 results out of 315)

How to use
foreach
method
in
org.apache.spark.api.java.JavaPairRDD