@Override public void registerTable(EntityMetadata m, SparkClient sparkClient) { SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext); Class clazz = m.getEntityClazz(); JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapRowTo(clazz)); sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName()); }
@Override public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient) { try { Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList(); ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz()); JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD(); CassandraJavaUtil.javaFunctions(personRDD) .writerBuilder(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapToRow(m.getEntityClazz())) .saveToCassandra(); return true; } catch (Exception e) { throw new KunderaException("Cannot persist object(s)", e); } }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }
private void store(JavaSparkContext sc, List<Dependency> links) { CassandraDependencies dependencies = new CassandraDependencies(links, day); javaFunctions(sc.parallelize(Collections.singletonList(dependencies))) .writerBuilder(keyspace, "dependencies", mapToRow(CassandraDependencies.class)) .saveToCassandra(); }
@SuppressWarnings("serial") private void showResults(JavaSparkContext sc) { JavaPairRDD<Integer, Summary> summariesRdd = javaFunctions(sc) .cassandraTable("java_api", "summaries", summaryReader) .keyBy(new Function<Summary, Integer>() { @Override public Integer call(Summary summary) throws Exception { return summary.getProduct(); } }); JavaPairRDD<Integer, Product> productsRdd = javaFunctions(sc) .cassandraTable("java_api", "products", productReader) .keyBy(new Function<Product, Integer>() { @Override public Integer call(Product product) throws Exception { return product.getId(); } }); List<Tuple2<Product, Optional<Summary>>> results = productsRdd.leftOuterJoin(summariesRdd).values().toArray(); for (Tuple2<Product, Optional<Summary>> result : results) { System.out.println(result); } }
public void run() { long microsLower = day.toInstant().toEpochMilli() * 1000; long microsUpper = day.plus(Period.ofDays(1)).toInstant().toEpochMilli() * 1000 - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", day, microsLower, microsUpper); JavaSparkContext sc = new JavaSparkContext(conf); try { JavaPairRDD<String, Iterable<Span>> traces = javaFunctions(sc) .cassandraTable(keyspace, "traces", mapRowTo(CassandraSpan.class)) .where("start_time < ? AND start_time > ?", microsUpper, microsLower) .mapToPair(span -> new Tuple2<>(span.getTraceId(), span)) .mapValues(span -> (Span) span) .groupByKey(); List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces); store(sc, dependencyLinks); log.info("Done, {} dependency objects created", dependencyLinks.size()); } finally { sc.stop(); } }
javaFunctions(productsRDD).writerBuilder("java_api", "products", productWriter).saveToCassandra(); javaFunctions(salesRDD).writerBuilder("java_api", "sales", saleWriter).saveToCassandra();
@SuppressWarnings("serial") private void compute(JavaSparkContext sc) { JavaPairRDD<Integer, Product> productsRDD = javaFunctions(sc) .cassandraTable("java_api", "products", productReader) .keyBy(new Function<Product, Integer>() { JavaPairRDD<Integer, Sale> salesRDD = javaFunctions(sc) .cassandraTable("java_api", "sales", saleReader) .keyBy(new Function<Sale, Integer>() { javaFunctions(summariesRDD).writerBuilder("java_api", "summaries", summaryWriter).saveToCassandra();
public void run() { long microsLower = day * 1000; long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", dateStamp, microsLower, microsUpper); SparkContext sc = new SparkContext(conf); List<DependencyLink> links = javaFunctions(sc) .cassandraTable(keyspace, "traces") .spanBy(ROW_TRACE_ID, Long.class) .flatMapValues(new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper)) .values() .mapToPair(LINK_TO_PAIR) .reduceByKey(MERGE_LINK) .values() .collect(); sc.stop(); saveToCassandra(links); }
try { JavaRDD<DependencyLink> links = flatMapToLinksByTraceId( javaFunctions(sc).cassandraTable(keyspace, "span"), microsUpper, microsLower, inTest ).values() .mapToPair(LINK_TO_PAIR)