/** * Show the first N lines of the dataset. * @param numLines the number of lines to show */ public void show(int numLines) { this.dataframe.show(numLines); }
/** * Show the first few lines of the data. */ public void show(){ this.dataframe.show(); }
/** * Show the first 20 lines of the dataset. */ public void show() { this.dataframe.show(); }
/** * Show the first few X lines of the data. * @param numLines the number of lines to show */ public void show(int numLines){ this.dataframe.show(numLines); }
@VisibleForTesting public void show() { this.dataset.show(); }
Dataset<Row> sqlResult = spark.sql(query); sqlResult.show(); // print first 20 records on the console sqlResult.write().parquet(output + "/parquetFormat"); // saves output in compressed Parquet format, recommended for large projects. sqlResult.rdd().saveAsTextFile(output + "/textFormat"); // to see output via cat command
private void start() { SparkSession spark = SparkSession.builder().appName( "Complex JSON array to Dataset").master("local").getOrCreate(); String filename = "data/array-complex.json"; long start = System.currentTimeMillis(); Dataset<Row> df = spark.read().json(filename); long stop = System.currentTimeMillis(); System.out.println("Processing took " + (stop - start) + " ms"); df.show(); df.printSchema(); } }
private void start() { SparkSession spark = SparkSession.builder().appName("JSON map to Dataset") .master("local").getOrCreate(); String filename = "data/map.json"; long start = System.currentTimeMillis(); Dataset<Row> df = spark.read().json(filename); long stop = System.currentTimeMillis(); System.out.println("Processing took " + (stop - start) + " ms"); df.show(); df.printSchema(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataset<String>") .master("local") .getOrCreate(); String[] l = new String[] { "a", "b", "c", "d" }; List<String> data = Arrays.asList(l); Dataset<String> df = spark.createDataset(data, Encoders.STRING()); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataframe (Dataset<Row>)") .master("local") .getOrCreate(); String[] l = new String[] { "a", "b", "c", "d" }; List<String> data = Arrays.asList(l); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); Dataset<Row> df = ds.toDF(); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Dataset from Text File") .master("local[*]") .getOrCreate(); String filename = "data/simple-data-file.txt"; Dataset<Row> df = spark.read().text(filename); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Dataset from Text File") .master("local[*]") .getOrCreate(); String filename = "data/simple-data-file.txt"; Dataset<Row> df = spark.read().text(filename); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder().appName("CSV to Dataset") .master("local").getOrCreate(); String filename = "data/csv-quoted.txt"; Dataset<Row> df = spark.read().option("inferSchema", "true").option( "header", "true").csv(filename); df.show(); df.printSchema(); } }
private void start() { SparkSession spark = SparkSession.builder().appName("CSV to Dataset") .master("local").getOrCreate(); String filename = "data/csv-q.txt"; Dataset<Row> df = spark.read().option("inferSchema", "true").option( "header", "true").csv(filename); df.show(); df.printSchema(); } }
private void start() { SparkSession spark = SparkSession.builder().appName("For Each Book").master( "local").getOrCreate(); String filename = "data/books.csv"; Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true") .option("header", "true") .load(filename); df.show(); df.foreach(new BookPrinter()); } }
private void start() { SparkSession spark = SparkSession.builder().appName("For Each Claim") .master("local").getOrCreate(); String filename = "data/claims.csv"; Dataset<Row> claimsDf = spark.read().format("csv").option("inferSchema", "true").option("header", "true") .load(filename); claimsDf.show(); claimsDf.foreach(new ClaimPrepAndProcess()); } }