org.apache.spark.api.java.JavaSparkContext.emptyRDD java code examples

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

  /**
   * Read a memoryRDD from the storage location.
   * The default implementation returns an empty RDD.
   *
   * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer}
   * @param memoryKey     the memory key of the memoryRDD
   * @param sparkContext  the Spark context with the requisite methods for generating a {@link JavaPairRDD}
   * @param <K>           the key class of the memoryRDD
   * @param <V>           the value class of the memoryRDD
   * @return the memoryRDD with respective key/value pairs.
   */
  public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
    return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t);
  }
}

@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
  if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
    throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
  Spark.create(sparkContext.sc());
  final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
  return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}

@Override
public JavaRDD<T> toRDD(JavaSparkContext sc) {
  return sc.emptyRDD();
}

  /**
   * Read a memoryRDD from the storage location.
   * The default implementation returns an empty RDD.
   *
   * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer}
   * @param memoryKey     the memory key of the memoryRDD
   * @param sparkContext  the Spark context with the requisite methods for generating a {@link JavaPairRDD}
   * @param <K>           the key class of the memoryRDD
   * @param <V>           the value class of the memoryRDD
   * @return the memoryRDD with respective key/value pairs.
   */
  public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
    return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t);
  }
}

public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String sourceName,
                     Class<K> keyClass, Class<V> valueClass) {
 Set<String> inputNames = sourceInputs.get(sourceName);
 if (inputNames == null || inputNames.isEmpty()) {
  // should never happen if validation happened correctly at pipeline configure time
  throw new IllegalArgumentException(
   sourceName + " has no input. Please check that the source calls setInput at some input.");
 }
 JavaPairRDD<K, V> inputRDD = JavaPairRDD.fromJavaRDD(jsc.<Tuple2<K, V>>emptyRDD());
 for (String inputName : inputNames) {
  inputRDD = inputRDD.union(createInputRDD(sec, jsc, inputName, keyClass, valueClass));
 }
 return inputRDD;
}

@Override
public <T> SparkStream<T> empty() {
 return new SparkStream<>(sparkContext().emptyRDD());
}

@Override
public JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
  HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
  String compactionInstantTime) throws IOException {
 if (compactionPlan == null || (compactionPlan.getOperations() == null)
   || (compactionPlan.getOperations().isEmpty())) {
  return jsc.emptyRDD();
 }
 HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
 // Compacting is very similar to applying updates to existing file
 HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
 List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(
     CompactionOperation::convertFromAvroRecordInstance).collect(toList());
 log.info("Compactor compacting " + operations + " files");
 return jsc.parallelize(operations, operations.size())
   .map(s -> compact(table, metaClient, config, s, compactionInstantTime))
   .flatMap(writeStatusesItr -> writeStatusesItr.iterator());
}

@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
  if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
    throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
  Spark.create(sparkContext.sc());
  final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
  return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}

@Override
public JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
  HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
  String compactionInstantTime) throws IOException {
 if (compactionPlan == null || (compactionPlan.getOperations() == null)
   || (compactionPlan.getOperations().isEmpty())) {
  return jsc.emptyRDD();
 }
 HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
 // Compacting is very similar to applying updates to existing file
 HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
 List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(
     CompactionOperation::convertFromAvroRecordInstance).collect(toList());
 log.info("Compactor compacting " + operations + " files");
 return jsc.parallelize(operations, operations.size())
   .map(s -> compact(table, metaClient, config, s, compactionInstantTime))
   .flatMap(List::iterator);
}

public JavaRDD<DI> getRDD(final int filterKey) {
  final long count = getCount(filterKey);
  log.info("#records for :{} = {}", filterKey, count);
  if (count > 0) {
    return getRDD(new FilterFunction<>(filterKey));
  } else {
    return (new JavaSparkContext(inputRDD.rdd().sparkContext())).emptyRDD();
  }
}

@Test
public void testTagLocationWithEmptyRDD() throws Exception {
 // We have some records to be tagged (two different partitions)
 JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
 // Also create the metadata and config
 HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
 HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
 HoodieTable table = HoodieTable.getHoodieTable(metadata, config, jsc);
 // Let's tag
 HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
 try {
  bloomIndex.tagLocation(recordRDD, jsc, table);
 } catch (IllegalArgumentException e) {
  fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices " + "required");
 }
}

Popular methods of JavaSparkContext

Popular in Java

Start an intent from android
startActivity (Activity)
setContentView (Activity)
getSharedPreferences (Context)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Top PhpStorm plugins

How to use emptyRDDmethodin org.apache.spark.api.java.JavaSparkContext

Best Java code snippets using org.apache.spark.api.java.JavaSparkContext.emptyRDD (Showing top 20 results out of 315)

How to use
emptyRDD
method
in
org.apache.spark.api.java.JavaSparkContext