@Test public void testLoadAPI() { spark.read().format("org.apache.spark.sql.test").load(); spark.read().format("org.apache.spark.sql.test").load(input); spark.read().format("org.apache.spark.sql.test").load(input, input, input); spark.read().format("org.apache.spark.sql.test").load(new String[]{input, input}); }
@Test public void testLoadAPI() { spark.read().format("org.apache.spark.sql.test").load(); spark.read().format("org.apache.spark.sql.test").load(input); spark.read().format("org.apache.spark.sql.test").load(input, input, input); spark.read().format("org.apache.spark.sql.test").load(new String[]{input, input}); }
@Test public void testLoadAPI() { spark.read().format("org.apache.spark.sql.test").load(); spark.read().format("org.apache.spark.sql.test").load(input); spark.read().format("org.apache.spark.sql.test").load(input, input, input); spark.read().format("org.apache.spark.sql.test").load(new String[]{input, input}); }
@Override public Dataset<String> read(SparkSession spark, Properties profilerProps, Properties readerProps) { String inputPath = TELEMETRY_INPUT_PATH.get(profilerProps, String.class); if(inputFormat == null) { inputFormat = TELEMETRY_INPUT_FORMAT.get(profilerProps, String.class); } LOG.debug("Loading telemetry; inputPath={}, inputFormat={}", inputPath, inputFormat); return spark .read() .options(Maps.fromProperties(readerProps)) .format(inputFormat) .load(inputPath) .toJSON(); } }
@Override public Dataset<String> read(SparkSession spark, Properties profilerProps, Properties readerProps) { String inputPath = TELEMETRY_INPUT_PATH.get(profilerProps, String.class); if(inputFormat == null) { inputFormat = TELEMETRY_INPUT_FORMAT.get(profilerProps, String.class); } LOG.debug("Loading telemetry; inputPath={}, inputFormat={}", inputPath, inputFormat); return spark .read() .options(Maps.fromProperties(readerProps)) .format(inputFormat) .load(inputPath) .as(Encoders.STRING()); } }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void testGenericLoad() { Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt")); Assert.assertEquals(4L, df1.count()); Dataset<Row> df2 = spark.read().format("text").load( getResource("test-data/text-suite.txt"), getResource("test-data/text-suite2.txt")); Assert.assertEquals(5L, df2.count()); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
@Test public void testGenericLoad() { Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt")); Assert.assertEquals(4L, df1.count()); Dataset<Row> df2 = spark.read().format("text").load( getResource("test-data/text-suite.txt"), getResource("test-data/text-suite2.txt")); Assert.assertEquals(5L, df2.count()); }
@Test public void testGenericLoad() { Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt")); Assert.assertEquals(4L, df1.count()); Dataset<Row> df2 = spark.read().format("text").load( getResource("test-data/text-suite.txt"), getResource("test-data/text-suite2.txt")); Assert.assertEquals(5L, df2.count()); }
@Test public void testFormatAPI() { spark .read() .format("org.apache.spark.sql.test") .load() .write() .format("org.apache.spark.sql.test") .save(); }
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>") .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>") .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema(createSchema()) .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }