@Override public void registerTable(EntityMetadata m, SparkClient sparkClient) { SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext); Class clazz = m.getEntityClazz(); JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapRowTo(clazz)); sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName()); }
SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkContext); JavaRDD<DataItem> cassandraRowsRDD = functions.cassandraTable("keyspace", "table_name", CassandraJavaUtil.mapRowTo(DataItem.class)); //required list of pojos List<DataItem> = cassandraRowsRDD.collect();
/** * Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}. * * <p>With this method, each row is converted to a {@code CassandraRow} object.</p> * * <p>Example: * <pre> * CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; * CREATE TABLE test.words (word text PRIMARY KEY, count int); * INSERT INTO test.words (word, count) VALUES ('foo', 20); * INSERT INTO test.words (word, count) VALUES ('bar', 20); * ... * * // Obtaining RDD of CassandraRow objects: * CassandraJavaRDD<CassandraRow> rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words"); * rdd.first().getString("word"); // foo * rdd.first().getInt("count"); // 20 * </pre> * </p> * * @since 1.0.0 */ public CassandraTableScanJavaRDD<CassandraRow> cassandraTable(String keyspace, String table) { RowReaderFactory<CassandraRow> rtf = GenericJavaRowReaderFactory.instance; return cassandraTable(keyspace, table, rtf); }
/** * Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}. * * <p>With this method, each row is converted to a {@code CassandraRow} object.</p> * * <p>Example: * <pre> * CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; * CREATE TABLE test.words (word text PRIMARY KEY, count int); * INSERT INTO test.words (word, count) VALUES ('foo', 20); * INSERT INTO test.words (word, count) VALUES ('bar', 20); * ... * * // Obtaining RDD of CassandraRow objects: * CassandraJavaRDD<CassandraRow> rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words"); * rdd.first().getString("word"); // foo * rdd.first().getInt("count"); // 20 * </pre> * </p> * * @since 1.0.0 */ public CassandraTableScanJavaRDD<CassandraRow> cassandraTable(String keyspace, String table) { RowReaderFactory<CassandraRow> rtf = GenericJavaRowReaderFactory.instance; return cassandraTable(keyspace, table, rtf); }
/** * Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}. * * <p>With this method, each row is converted to a {@code CassandraRow} object.</p> * * <p>Example: * <pre> * CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; * CREATE TABLE test.words (word text PRIMARY KEY, count int); * INSERT INTO test.words (word, count) VALUES ('foo', 20); * INSERT INTO test.words (word, count) VALUES ('bar', 20); * ... * * // Obtaining RDD of CassandraRow objects: * CassandraJavaRDD<CassandraRow> rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words"); * rdd.first().getString("word"); // foo * rdd.first().getInt("count"); // 20 * </pre> * </p> * * @since 1.0.0 */ public CassandraTableScanJavaRDD<CassandraRow> cassandraTable(String keyspace, String table) { RowReaderFactory<CassandraRow> rtf = GenericJavaRowReaderFactory.instance; return cassandraTable(keyspace, table, rtf); }
/** * Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}. * * <p>With this method, each row is converted to a {@code CassandraRow} object.</p> * * <p>Example: * <pre> * CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; * CREATE TABLE test.words (word text PRIMARY KEY, count int); * INSERT INTO test.words (word, count) VALUES ('foo', 20); * INSERT INTO test.words (word, count) VALUES ('bar', 20); * ... * * // Obtaining RDD of CassandraRow objects: * CassandraJavaRDD<CassandraRow> rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words"); * rdd.first().getString("word"); // foo * rdd.first().getInt("count"); // 20 * </pre> * </p> * * @since 1.0.0 */ public CassandraTableScanJavaRDD<CassandraRow> cassandraTable(String keyspace, String table) { RowReaderFactory<CassandraRow> rtf = GenericJavaRowReaderFactory.instance; return cassandraTable(keyspace, table, rtf); }
/** * Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}. * * <p>With this method, each row is converted to a {@code CassandraRow} object.</p> * * <p>Example: * <pre> * CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; * CREATE TABLE test.words (word text PRIMARY KEY, count int); * INSERT INTO test.words (word, count) VALUES ('foo', 20); * INSERT INTO test.words (word, count) VALUES ('bar', 20); * ... * * // Obtaining RDD of CassandraRow objects: * CassandraJavaRDD<CassandraRow> rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words"); * rdd.first().getString("word"); // foo * rdd.first().getInt("count"); // 20 * </pre> * </p> * * @since 1.0.0 */ public CassandraTableScanJavaRDD<CassandraRow> cassandraTable(String keyspace, String table) { RowReaderFactory<CassandraRow> rtf = GenericJavaRowReaderFactory.instance; return cassandraTable(keyspace, table, rtf); }
@SuppressWarnings("serial") private void showResults(JavaSparkContext sc) { JavaPairRDD<Integer, Summary> summariesRdd = javaFunctions(sc) .cassandraTable("java_api", "summaries", summaryReader) .keyBy(new Function<Summary, Integer>() { @Override public Integer call(Summary summary) throws Exception { return summary.getProduct(); } }); JavaPairRDD<Integer, Product> productsRdd = javaFunctions(sc) .cassandraTable("java_api", "products", productReader) .keyBy(new Function<Product, Integer>() { @Override public Integer call(Product product) throws Exception { return product.getId(); } }); List<Tuple2<Product, Optional<Summary>>> results = productsRdd.leftOuterJoin(summariesRdd).values().toArray(); for (Tuple2<Product, Optional<Summary>> result : results) { System.out.println(result); } }
public void run() { long microsLower = day.toInstant().toEpochMilli() * 1000; long microsUpper = day.plus(Period.ofDays(1)).toInstant().toEpochMilli() * 1000 - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", day, microsLower, microsUpper); JavaSparkContext sc = new JavaSparkContext(conf); try { JavaPairRDD<String, Iterable<Span>> traces = javaFunctions(sc) .cassandraTable(keyspace, "traces", mapRowTo(CassandraSpan.class)) .where("start_time < ? AND start_time > ?", microsUpper, microsLower) .mapToPair(span -> new Tuple2<>(span.getTraceId(), span)) .mapValues(span -> (Span) span) .groupByKey(); List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces); store(sc, dependencyLinks); log.info("Done, {} dependency objects created", dependencyLinks.size()); } finally { sc.stop(); } }
public void run() { long microsLower = day * 1000; long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", dateStamp, microsLower, microsUpper); SparkContext sc = new SparkContext(conf); List<DependencyLink> links = javaFunctions(sc) .cassandraTable(keyspace, "traces") .spanBy(ROW_TRACE_ID, Long.class) .flatMapValues(new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper)) .values() .mapToPair(LINK_TO_PAIR) .reduceByKey(MERGE_LINK) .values() .collect(); sc.stop(); saveToCassandra(links); }
try { JavaRDD<DependencyLink> links = flatMapToLinksByTraceId( javaFunctions(sc).cassandraTable(keyspace, "span"), microsUpper, microsLower, inTest ).values() .mapToPair(LINK_TO_PAIR)
@SuppressWarnings("serial") private void compute(JavaSparkContext sc) { JavaPairRDD<Integer, Product> productsRDD = javaFunctions(sc) .cassandraTable("java_api", "products", productReader) .keyBy(new Function<Product, Integer>() { @Override .cassandraTable("java_api", "sales", saleReader) .keyBy(new Function<Sale, Integer>() { @Override