org.apache.spark.sql.DataFrameWriter.saveAsTable java code examples

/**
 * Saves an RDD of bundles as a database, where each table
 * has the resource name. This offers a simple way to load and query
 * bundles in a system, although users with more sophisticated ETL
 * operations may want to explicitly write different entities.
 *
 * <p>
 * Note this will access the given RDD of bundles once per resource name,
 * so consumers with enough memory should consider calling
 * {@link JavaRDD#cache()} so that RDD is not recomputed for each.
 * </p>
 *
 * @param spark the spark session
 * @param bundles an RDD of FHIR Bundles
 * @param database the name of the database to write to
 * @param resourceNames names of resources to be extracted from the bundle and written
 */
public void saveAsDatabase(SparkSession spark,
  JavaRDD<BundleContainer> bundles,
  String database,
  String... resourceNames) {
 spark.sql("create database if not exists " + database);
 for (String resourceName : resourceNames) {
  Dataset ds = extractEntry(spark, bundles, resourceName);
  ds.write().saveAsTable(database + "." + resourceName.toLowerCase());
 }
}

/**
 * Saves an RDD of bundles as a database, where each table
 * has the resource name. This offers a simple way to load and query
 * bundles in a system, although users with more sophisticated ETL
 * operations may want to explicitly write different entities.
 *
 * <p>
 * Note this will access the given RDD of bundles once per resource name,
 * so consumers with enough memory should consider calling
 * {@link JavaRDD#cache()} so that RDD is not recomputed for each.
 * </p>
 *
 * @param spark the spark session
 * @param bundles an RDD of FHIR Bundles
 * @param database the name of the database to write to
 * @param resourceNames names of resources to be extracted from the bundle and written
 */
public void saveAsDatabase(SparkSession spark,
  JavaRDD<BundleContainer> bundles,
  String database,
  String... resourceNames) {
 spark.sql("create database if not exists " + database);
 for (String resourceName : resourceNames) {
  Dataset ds = extractEntry(spark, bundles, resourceName);
  ds.write().saveAsTable(database + "." + resourceName.toLowerCase());
 }
}

 @Test
 public void saveTableAndQueryIt() {
  Map<String, String> options = new HashMap<>();
  df.write()
   .format("org.apache.spark.sql.json")
   .mode(SaveMode.Append)
   .options(options)
   .saveAsTable("javaSavedTable");

  checkAnswer(
   sqlContext.sql("SELECT * FROM javaSavedTable"),
   df.collectAsList());
 }
}

 @Test
 public void saveTableAndQueryIt() {
  Map<String, String> options = new HashMap<>();
  df.write()
   .format("org.apache.spark.sql.json")
   .mode(SaveMode.Append)
   .options(options)
   .saveAsTable("javaSavedTable");

  checkAnswer(
   sqlContext.sql("SELECT * FROM javaSavedTable"),
   df.collectAsList());
 }
}

.format("parquet")
.partitionBy("timestamp")
.saveAsTable(conceptMapTable);

@Test
public void saveExternalTableAndQueryIt() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write()
  .format("org.apache.spark.sql.json")
  .mode(SaveMode.Append)
  .options(options)
  .saveAsTable("javaSavedTable");
 checkAnswer(
  sqlContext.sql("SELECT * FROM javaSavedTable"),
  df.collectAsList());
 Dataset<Row> loadedDF =
  sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", options);
 checkAnswer(loadedDF, df.collectAsList());
 checkAnswer(
  sqlContext.sql("SELECT * FROM externalTable"),
  df.collectAsList());
}

@Test
public void saveExternalTableAndQueryIt() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write()
  .format("org.apache.spark.sql.json")
  .mode(SaveMode.Append)
  .options(options)
  .saveAsTable("javaSavedTable");
 checkAnswer(
  sqlContext.sql("SELECT * FROM javaSavedTable"),
  df.collectAsList());
 Dataset<Row> loadedDF =
  sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", options);
 checkAnswer(loadedDF, df.collectAsList());
 checkAnswer(
  sqlContext.sql("SELECT * FROM externalTable"),
  df.collectAsList());
}

.format("parquet")
.partitionBy("timestamp")
.saveAsTable(conceptMapTable);

@Test
public void saveExternalTableWithSchemaAndQueryIt() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write()
  .format("org.apache.spark.sql.json")
  .mode(SaveMode.Append)
  .options(options)
  .saveAsTable("javaSavedTable");
 checkAnswer(
  sqlContext.sql("SELECT * FROM javaSavedTable"),
  df.collectAsList());
 List<StructField> fields = new ArrayList<>();
 fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
 StructType schema = DataTypes.createStructType(fields);
 Dataset<Row> loadedDF =
  sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", schema, options);
 checkAnswer(
  loadedDF,
  sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
 checkAnswer(
  sqlContext.sql("SELECT * FROM externalTable"),
  sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
}

.format("parquet")
.partitionBy("timestamp")
.saveAsTable(valueSetTable);

.format("parquet")
.partitionBy("timestamp")
.saveAsTable(valueSetTable);

@Test
public void saveExternalTableWithSchemaAndQueryIt() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write()
  .format("org.apache.spark.sql.json")
  .mode(SaveMode.Append)
  .options(options)
  .saveAsTable("javaSavedTable");
 checkAnswer(
  sqlContext.sql("SELECT * FROM javaSavedTable"),
  df.collectAsList());
 List<StructField> fields = new ArrayList<>();
 fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
 StructType schema = DataTypes.createStructType(fields);
 Dataset<Row> loadedDF =
  sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", schema, options);
 checkAnswer(
  loadedDF,
  sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
 checkAnswer(
  sqlContext.sql("SELECT * FROM externalTable"),
  sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
}

Popular methods of DataFrameWriter

Popular in Java

Reading from database using SQL prepared statement
notifyDataSetChanged (ArrayAdapter)
getApplicationContext (Context)
scheduleAtFixedRate (ScheduledExecutorService)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Top Sublime Text plugins

How to use saveAsTablemethodin org.apache.spark.sql.DataFrameWriter

Best Java code snippets using org.apache.spark.sql.DataFrameWriter.saveAsTable (Showing top 12 results out of 315)

How to use
saveAsTable
method
in
org.apache.spark.sql.DataFrameWriter