@Test public void emptyRDD() { JavaRDD<String> rdd = sc.emptyRDD(); assertEquals("Empty RDD shouldn't have any values", 0, rdd.count()); }
@Test public void emptyRDD() { JavaRDD<String> rdd = sc.emptyRDD(); assertEquals("Empty RDD shouldn't have any values", 0, rdd.count()); }
@Test public void emptyRDD() { JavaRDD<String> rdd = sc.emptyRDD(); assertEquals("Empty RDD shouldn't have any values", 0, rdd.count()); }
@Test public void isEmpty() { assertTrue(sc.emptyRDD().isEmpty()); assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty()); assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty()); }
@Test public void isEmpty() { assertTrue(sc.emptyRDD().isEmpty()); assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty()); assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty()); }
@Test public void isEmpty() { assertTrue(sc.emptyRDD().isEmpty()); assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty()); assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty()); assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
/** * Read a memoryRDD from the storage location. * The default implementation returns an empty RDD. * * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer} * @param memoryKey the memory key of the memoryRDD * @param sparkContext the Spark context with the requisite methods for generating a {@link JavaPairRDD} * @param <K> the key class of the memoryRDD * @param <V> the value class of the memoryRDD * @return the memoryRDD with respective key/value pairs. */ public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) { return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t); } }
@Override public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); Spark.create(sparkContext.sc()); final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open()); return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD()); }
@Override public JavaRDD<T> toRDD(JavaSparkContext sc) { return sc.emptyRDD(); }
/** * Read a memoryRDD from the storage location. * The default implementation returns an empty RDD. * * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer} * @param memoryKey the memory key of the memoryRDD * @param sparkContext the Spark context with the requisite methods for generating a {@link JavaPairRDD} * @param <K> the key class of the memoryRDD * @param <V> the value class of the memoryRDD * @return the memoryRDD with respective key/value pairs. */ public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) { return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t); } }
public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String sourceName, Class<K> keyClass, Class<V> valueClass) { Set<String> inputNames = sourceInputs.get(sourceName); if (inputNames == null || inputNames.isEmpty()) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException( sourceName + " has no input. Please check that the source calls setInput at some input."); } JavaPairRDD<K, V> inputRDD = JavaPairRDD.fromJavaRDD(jsc.<Tuple2<K, V>>emptyRDD()); for (String inputName : inputNames) { inputRDD = inputRDD.union(createInputRDD(sec, jsc, inputName, keyClass, valueClass)); } return inputRDD; }
@Override public <T> SparkStream<T> empty() { return new SparkStream<>(sparkContext().emptyRDD()); }
@Override public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config, String compactionInstantTime) throws IOException { if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) { return jsc.emptyRDD(); } HoodieTableMetaClient metaClient = hoodieTable.getMetaClient(); // Compacting is very similar to applying updates to existing file HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); List<CompactionOperation> operations = compactionPlan.getOperations().stream().map( CompactionOperation::convertFromAvroRecordInstance).collect(toList()); log.info("Compactor compacting " + operations + " files"); return jsc.parallelize(operations, operations.size()) .map(s -> compact(table, metaClient, config, s, compactionInstantTime)) .flatMap(writeStatusesItr -> writeStatusesItr.iterator()); }
@Override public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); Spark.create(sparkContext.sc()); final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open()); return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD()); }
@Override public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config, String compactionInstantTime) throws IOException { if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) { return jsc.emptyRDD(); } HoodieTableMetaClient metaClient = hoodieTable.getMetaClient(); // Compacting is very similar to applying updates to existing file HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); List<CompactionOperation> operations = compactionPlan.getOperations().stream().map( CompactionOperation::convertFromAvroRecordInstance).collect(toList()); log.info("Compactor compacting " + operations + " files"); return jsc.parallelize(operations, operations.size()) .map(s -> compact(table, metaClient, config, s, compactionInstantTime)) .flatMap(List::iterator); }
public JavaRDD<DI> getRDD(final int filterKey) { final long count = getCount(filterKey); log.info("#records for :{} = {}", filterKey, count); if (count > 0) { return getRDD(new FilterFunction<>(filterKey)); } else { return (new JavaSparkContext(inputRDD.rdd().sparkContext())).emptyRDD(); } }
@Test public void testTagLocationWithEmptyRDD() throws Exception { // We have some records to be tagged (two different partitions) JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD(); // Also create the metadata and config HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieTable table = HoodieTable.getHoodieTable(metadata, config, jsc); // Let's tag HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config); try { bloomIndex.tagLocation(recordRDD, jsc, table); } catch (IllegalArgumentException e) { fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices " + "required"); } }