Refine search
@Test public void testOf() { Assert.assertTrue(Optional.of(1).isPresent()); Assert.assertNotNull(Optional.of(1).orNull()); Assert.assertEquals(Integer.valueOf(1), Optional.of(1).get()); Assert.assertEquals(Integer.valueOf(1), Optional.of(1).or(2)); Assert.assertEquals(Integer.valueOf(1), Optional.of(1).orElse(2)); }
@Test public void testOfNullable() { Assert.assertTrue(Optional.ofNullable(1).isPresent()); Assert.assertNotNull(Optional.ofNullable(1).orNull()); Assert.assertEquals(Integer.valueOf(1), Optional.ofNullable(1).get()); Assert.assertEquals(Integer.valueOf(1), Optional.ofNullable(1).or(2)); Assert.assertEquals(Integer.valueOf(1), Optional.ofNullable(1).orElse(2)); Assert.assertFalse(Optional.ofNullable(null).isPresent()); Assert.assertNull(Optional.ofNullable(null).orNull()); Assert.assertEquals(Integer.valueOf(2), Optional.<Integer>ofNullable(null).or(2)); Assert.assertEquals(Integer.valueOf(2), Optional.<Integer>ofNullable(null).orElse(2)); }
final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration)); final Storage sparkContextStorage = SparkContextStorage.open(); if (loadedGraphRDD.partitioner().isPresent()) this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: " + loadedGraphRDD.partitioner().get()); else { if (!skipPartitioner) { final Partitioner partitioner = new HashPartitioner(this.workersSet ? this.workers : loadedGraphRDD.partitions().size()); this.logger.debug("Partitioning the loaded graphRDD: " + partitioner); loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner); partitioned = true; assert loadedGraphRDD.partitioner().isPresent(); } else { assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent(); // no easy way to test this with a test case this.logger.debug("Partitioning has been skipped for the loaded graphRDD via " + GREMLIN_SPARK_SKIP_PARTITIONER); sparkContext.cancelAllJobs(); throw new TraversalInterruptedException();
final Configuration vertexProgramConfiguration) { // has the VertexProgram.loadState() information boolean partitionedGraphRDD = graphRDD.partitioner().isPresent(); assert graphRDD.partitioner().get().equals(viewIncomingRDD.partitioner().get()); final JavaPairRDD<Object, ViewOutgoingPayload<M>> viewOutgoingRDD = ((null == viewIncomingRDD) ? graphRDD.mapValues(vertexWritable -> new Tuple2<>(vertexWritable, Optional.<ViewIncomingPayload<M>>absent())) : // first iteration will not have any views or messages graphRDD.leftOuterJoin(viewIncomingRDD)) // every other iteration may have views and messages final boolean hasViewAndMessages = vertexViewIncoming._2()._2().isPresent(); // if this is the first iteration, then there are no views or messages final List<DetachedVertexProperty<Object>> previousView = hasViewAndMessages ? vertexViewIncoming._2()._2().get().getView() : memory.isInitialIteration() ? new ArrayList<>() : Collections.emptyList(); final List<M> incomingMessages = hasViewAndMessages ? vertexViewIncoming._2()._2().get().getIncomingMessages() : Collections.emptyList(); assert graphRDD.partitioner().get().equals(viewOutgoingRDD.partitioner().get()); viewOutgoingRDD.flatMapToPair(messageFunction).reduceByKey(graphRDD.partitioner().get(), reducerFunction) : viewOutgoingRDD.flatMapToPair(messageFunction).reduceByKey(reducerFunction)) .mapValues(payload -> { // handle various corner cases of when views don't exist, messages don't exist, or neither exists. assert graphRDD.partitioner().get().equals(newViewIncomingRDD.partitioner().get()); newViewIncomingRDD .foreachPartition(partitionIterator -> {
@Test public void leftOuterJoin() { JavaPairRDD<Integer, Integer> rdd1 = sc.parallelizePairs(Arrays.asList( new Tuple2<>(1, 1), new Tuple2<>(1, 2), new Tuple2<>(2, 1), new Tuple2<>(3, 1) )); JavaPairRDD<Integer, Character> rdd2 = sc.parallelizePairs(Arrays.asList( new Tuple2<>(1, 'x'), new Tuple2<>(2, 'y'), new Tuple2<>(2, 'z'), new Tuple2<>(4, 'w') )); List<Tuple2<Integer, Tuple2<Integer, Optional<Character>>>> joined = rdd1.leftOuterJoin(rdd2).collect(); Assert.assertEquals(5, joined.size()); Tuple2<Integer, Tuple2<Integer, Optional<Character>>> firstUnmatched = rdd1.leftOuterJoin(rdd2).filter(tup -> !tup._2()._2().isPresent()).first(); Assert.assertEquals(3, firstUnmatched._1().intValue()); }
@Test public void checkpointAndRestore() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); sc.setCheckpointDir(tempDir.getAbsolutePath()); assertFalse(rdd.isCheckpointed()); rdd.checkpoint(); rdd.count(); // Forces the DAG to cause a checkpoint assertTrue(rdd.isCheckpointed()); assertTrue(rdd.getCheckpointFile().isPresent()); JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get()); assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect()); }
new Tuple2<>("new york", 2)); JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial); JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD); if (state.isPresent()) { out += state.get(); return Optional.of(out); }, new HashPartitioner(1), initialRDD); JavaTestUtils.attachTestOutputStream(updated);
.setName(operator.getName() + "::extract-left") .flatMapToPair( t -> { new Tuple2<>( t._1, new Tuple2<>(opt(t._2), Optional.<SparkElement>empty()))) .iterator(); key -> context.getExecutionEnvironment() .broadcast( toBroadcast( left new Tuple2<>( t._1, new Tuple2<>(Optional.<SparkElement>empty(), opt(t._2)))) .iterator(); final SparkElement first = t._2._1.orNull(); final SparkElement second = t._2._2.orNull(); final Window window = first == null ? second.getWindow() : first.getWindow(); final long maxTimestamp =
pairs.add(new Tuple2<>(1, 3)); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); rdd.repartitionAndSortWithinPartitions(partitioner); assertTrue(repartitioned.partitioner().isPresent()); assertEquals(repartitioned.partitioner().get(), partitioner); List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect(); assertEquals(partitions.get(0),
public static <M> JavaPairRDD<Object, VertexWritable> prepareFinalGraphRDD( final JavaPairRDD<Object, VertexWritable> graphRDD, final JavaPairRDD<Object, ViewIncomingPayload<M>> viewIncomingRDD, final Set<VertexComputeKey> vertexComputeKeys) { // the graphRDD and the viewRDD must have the same partitioner if (graphRDD.partitioner().isPresent()) assert (graphRDD.partitioner().get().equals(viewIncomingRDD.partitioner().get())); final String[] vertexComputeKeysArray = VertexProgramHelper.vertexComputeKeysAsArray(vertexComputeKeys); // the compute keys as an array return graphRDD.leftOuterJoin(viewIncomingRDD) .mapValues(tuple -> { final StarGraph.StarVertex vertex = tuple._1().get(); vertex.dropVertexProperties(vertexComputeKeysArray); // drop all existing compute keys // attach the final computed view to the cached graph final List<DetachedVertexProperty<Object>> view = tuple._2().isPresent() ? tuple._2().get().getView() : Collections.emptyList(); for (final DetachedVertexProperty<Object> property : view) { if (!VertexProgramHelper.isTransientVertexComputeKey(property.key(), vertexComputeKeys)) property.attach(Attachable.Method.create(vertex)); } return tuple._1(); }); }
public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys, JavaSparkContext jsc, HoodieTable<T> hoodieTable) { JavaPairRDD<String, String> partitionRecordKeyPairRDD = hoodieKeys .mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey())); // Lookup indexes for all the partition/recordkey pair JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable); JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD = hoodieKeys .mapToPair(key -> new Tuple2<>(key.getRecordKey(), key)); return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).mapToPair(keyPathTuple -> { Optional<String> recordLocationPath; if (keyPathTuple._2._2.isPresent()) { String fileName = keyPathTuple._2._2.get(); String partitionPath = keyPathTuple._2._1.getPartitionPath(); recordLocationPath = Optional .of(new Path(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath), fileName) .toUri().getPath()); } else { recordLocationPath = Optional.absent(); } return new Tuple2<>(keyPathTuple._2._1, recordLocationPath); }); }
/** * Tag the <rowKey, filename> back to the original HoodieRecord RDD. */ private JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords( JavaPairRDD<String, String> rowKeyFilenamePairRDD, JavaRDD<HoodieRecord<T>> recordRDD) { JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record)); // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null), // so we do left outer join. return rowKeyRecordPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).values().map(v1 -> { HoodieRecord<T> record = v1._1(); if (v1._2().isPresent()) { String filename = v1._2().get(); if (filename != null && !filename.isEmpty()) { // When you have a record in multiple files in the same partition, then rowKeyRecordPairRDD will have 2 // entries with the same exact in memory copy of the HoodieRecord and the 2 separate filenames that the // record is found in. This will result in setting currentLocation 2 times and it will fail the second time. // This check will create a new in memory copy of the hoodie record. if (record.getCurrentLocation() != null) { record = new HoodieRecord<T>(record.getKey(), record.getData()); } record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename))); } } return record; }); }
@Override public <V> MPairStream<T, Map.Entry<U, V>> leftOuterJoin(@NonNull MPairStream<? extends T, ? extends V> stream) { return new SparkPairStream<>(rdd.leftOuterJoin(toPairRDD(stream)) .mapToPair(t -> Cast.as( new scala.Tuple2<>(t._1(), Tuple2.of(t._2()._1(), t._2()._2().or(null)))))); }
@Test public void testFromNullable() { Assert.assertTrue(Optional.fromNullable(1).isPresent()); Assert.assertNotNull(Optional.fromNullable(1).orNull()); Assert.assertEquals(Integer.valueOf(1), Optional.fromNullable(1).get()); Assert.assertEquals(Integer.valueOf(1), Optional.fromNullable(1).or(2)); Assert.assertEquals(Integer.valueOf(1), Optional.fromNullable(1).orElse(2)); Assert.assertFalse(Optional.fromNullable(null).isPresent()); Assert.assertNull(Optional.fromNullable(null).orNull()); Assert.assertEquals(Integer.valueOf(2), Optional.<Integer>fromNullable(null).or(2)); Assert.assertEquals(Integer.valueOf(2), Optional.<Integer>fromNullable(null).orElse(2)); }
@Override public <V> MPairStream<T, Map.Entry<U, V>> rightOuterJoin(@NonNull MPairStream<? extends T, ? extends V> stream) { return new SparkPairStream<>(rdd.rightOuterJoin(toPairRDD(stream)) .mapToPair(t -> Cast.as( new scala.Tuple2<>(t._1(), Tuple2.of(t._2()._1().or(null), t._2()._2()))))); }