@Override public void registerTable(EntityMetadata m, SparkClient sparkClient) { SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext); Class clazz = m.getEntityClazz(); JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapRowTo(clazz)); sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName()); }
public void run() { long microsLower = day.toInstant().toEpochMilli() * 1000; long microsUpper = day.plus(Period.ofDays(1)).toInstant().toEpochMilli() * 1000 - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", day, microsLower, microsUpper); JavaSparkContext sc = new JavaSparkContext(conf); try { JavaPairRDD<String, Iterable<Span>> traces = javaFunctions(sc) .cassandraTable(keyspace, "traces", mapRowTo(CassandraSpan.class)) .where("start_time < ? AND start_time > ?", microsUpper, microsLower) .mapToPair(span -> new Tuple2<>(span.getTraceId(), span)) .mapValues(span -> (Span) span) .groupByKey(); List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces); store(sc, dependencyLinks); log.info("Done, {} dependency objects created", dependencyLinks.size()); } finally { sc.stop(); } }