@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
public void couchbase() { dfw.save(); }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
public void couchbase(Map<String, String> options) { prepare(options); dfw.save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
public static void writeDataToFolder(DataSpark data, String path, SQLContext sqlContext, String formatFile) throws Exception { data.getDataFrame(sqlContext).write().mode(SaveMode.Overwrite).format(formatFile).save(path); }
public void compact(String[] args) throws IOException { this.setCompressionAndSerializationOptions(this.parseCli(args)); this.outputCompressionProperties(this.outputCompression); // Defining Spark Context with a generic Spark Configuration. SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction"); JavaSparkContext sc = new JavaSparkContext(sparkConf); if (this.outputSerialization.equals(TEXT)) { JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath)); textFile.coalesce(this.splitSize).saveAsTextFile(outputPath); } else if (this.outputSerialization.equals(PARQUET)) { SQLContext sqlContext = new SQLContext(sc); DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath)); parquetFile.coalesce(this.splitSize).write().parquet(outputPath); } else if (this.outputSerialization.equals(AVRO)) { // For this to work the files must end in .avro SQLContext sqlContext = new SQLContext(sc); DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath)); avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath); } else { System.out.println("Did not match any serialization type: text, parquet, or avro. Recieved: " + this.outputSerialization); } }
private File buildPartitionedTable(String desc, PartitionSpec spec, String udf, String partitionColumn) { File location = new File(parent, desc); Table byId = TABLES.create(SCHEMA, spec, location.toString()); // do not combine splits because the tests expect a split per partition byId.updateProperties().set("read.split.target-size", "1").commit(); // copy the unpartitioned table into the partitioned table to produce the partitioned data Dataset<Row> allRows = spark.read() .format("iceberg") .load(unpartitioned.toString()); allRows .coalesce(1) // ensure only 1 file per partition is written .withColumn("part", callUDF(udf, column(partitionColumn))) .sortWithinPartitions("part") .drop("part") .write() .format("iceberg") .mode("append") .save(byId.location()); return location; }