Refine search
@Override public SparkSession getOrCreate() { return SparkSession.builder().config(getSparkConf()).getOrCreate(); }
final String outputPath = args[1]; SparkSession spark = SparkSession.builder().appName("minmax").getOrCreate(); JavaRDD<String> numbers = spark.read().textFile(inputPath).javaRDD(); spark.close();
@Override public TryResult configure(KernelFunctionality kernel, SparkUIApi sparkUI, Message parentMessage) { SparkConf sparkConf = createSparkConf(sparkUI.getAdvancedOptions(), getSparkConfBasedOn(this.sparkSessionBuilder)); sparkConf = configureSparkConf(sparkConf, sparkUI); this.sparkSessionBuilder = SparkSession.builder().config(sparkConf); if (sparkUI.getHiveSupport()) { this.sparkSessionBuilder.enableHiveSupport(); } TryResult sparkSessionTry = createSparkSession(sparkUI, parentMessage); if (sparkSessionTry.isError()) { return sparkSessionTry; } addListener(getOrCreate().sparkContext(), sparkUI); SparkVariable.putSparkSession(getOrCreate()); TryResult tryResultSparkContext = initSparkContextInShell(kernel, parentMessage); if (!tryResultSparkContext.isError()) { kernel.registerCancelHook(SparkVariable::cancelAllJobs); } return tryResultSparkContext; }
private void start() { SparkSession spark = SparkSession.builder().appName( "Complex JSON array to Dataset").master("local").getOrCreate(); String filename = "data/array-complex.json"; long start = System.currentTimeMillis(); Dataset<Row> df = spark.read().json(filename); long stop = System.currentTimeMillis(); System.out.println("Processing took " + (stop - start) + " ms"); df.show(); df.printSchema(); } }
.builder() .appName("knn") .getOrCreate(); JavaSparkContext context = JavaSparkContext.fromSparkContext(session.sparkContext()); JavaRDD<String> R = session.read().textFile(datasetR).javaRDD(); R.saveAsTextFile(outputPath+"/R"); JavaRDD<String> S = session.read().textFile(datasetS).javaRDD();
.builder() .config(sparkConf).getOrCreate(); Tuple2<String, String>[] tuples = (Tuple2<String, String>[]) sparkSession.sparkContext().wholeTextFiles(query, 1).collect(); query = tuples[0]._2(); System.out.println("Query: " + query); engine.executeScript(query, queryOverwrite, sparkSession, false); sparkSession.stop();
.builder() .appName("SparkSQLRelativeFrequency") .config(sparkConf) .getOrCreate(); JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); int neighborWindow = Integer.parseInt(args[0]); String input = args[1]; Dataset<Row> rfDataset = spark.createDataFrame(rowRDD, rfSchema);
@Before public void setUp() throws IOException { spark = SparkSession.builder() .master("local[*]") .appName("testing") .getOrCreate(); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING()); df = spark.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
public void run() throws Exception { SparkSession spark = SparkSession.builder().appName("Hoodie Spark APP") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]") .getOrCreate(); JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext()); FileSystem fs = FileSystem.get(jssc.hadoopConfiguration()); Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
SparkSession spark = SparkSession.builder().appName("Hoodie Spark Streaming APP") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]") .getOrCreate(); JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext()); Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
private void start() { SparkSession spark = SparkSession.builder().appName("CSV to Dataset") .master("local").getOrCreate(); spark.udf().register("x2Multiplier", new Multiplier2(), DataTypes.IntegerType); String filename = "data/tuple-data-file.csv"; Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true") .option("header", "false").load(filename); df = df.withColumn("label", df.col("_c0")).drop("_c0"); df = df.withColumn("value", df.col("_c1")).drop("_c1"); df = df.withColumn("x2", callUDF("x2Multiplier", df.col("value").cast( DataTypes.IntegerType))); df.show(); } }
private void start() { SparkConf conf = new SparkConf().setAppName("Concurrency Lab 001") .setMaster(Config.MASTER); JavaSparkContext sc = new JavaSparkContext(conf); SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); conf = spark.sparkContext().conf(); System.out.println(conf.get("hello")); Dataset<Row> df = spark.sql("SELECT * from myView"); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("CSV on S3 to Dataset<Row>") .master("spark://10.0.100.81:7077") .config("spark.executor.memory", "1g") .config("spark.executor.cores", "1") .config("spark.cores.max", "2") .config("spark.driver.host", "10.0.100.182") .config("spark.executor.extraClassPath", "/home/jgp/net.jgp.labs.spark/target/labs-spark-2.2.0-jar-with-dependencies.jar") .getOrCreate(); spark.sparkContext().hadoopConfiguration().set("fs.s3a.access.key", "xxx"); spark.sparkContext().hadoopConfiguration().set("fs.s3a.secret.key", "xxx");
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataset<String>") .master("local") .getOrCreate(); String[] l = new String[] { "a", "b", "c", "d" }; List<String> data = Arrays.asList(l); Dataset<String> df = spark.createDataset(data, Encoders.STRING()); df.show(); } }
private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) { SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate(); final Dataset intermediateTable = sparkSession.table(hiveTable); return intermediateTable.javaRDD().map(new Function<Row, String[]>() { @Override public String[] call(Row row) throws Exception { String[] result = new String[row.size()]; for (int i = 0; i < row.size(); i++) { final Object o = row.get(i); if (o != null) { result[i] = o.toString(); } else { result[i] = null; } } return result; } }); }
public static void main(String[] args) { SparkSession spark = SparkSession.builder().appName("Hello Spark").master( "local").getOrCreate(); System.out.println("Hello, Spark v." + spark.version()); }
final Constructor<SparkRequirement> constructor = ((Class<SparkRequirement>) requirementClass).getConstructor(SparkSession.class); SparkSession sc = SparkSession.builder() .appName(appName) .enableHiveSupport() .getOrCreate(); sc.stop(); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException ex) {
@BeforeClass public static void startSpark() { TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); // define UDFs used by partition tests Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4); spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$); Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone()); spark.udf().register("ts_day", (UDF1<Timestamp, Integer>) timestamp -> day.apply(fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone()); spark.udf().register("ts_hour", (UDF1<Timestamp, Integer>) timestamp -> hour.apply(fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<CharSequence, CharSequence> trunc1 = Transforms.truncate(Types.StringType.get(), 1); spark.udf().register("trunc1", (UDF1<CharSequence, CharSequence>) str -> trunc1.apply(str.toString()), StringType$.MODULE$); }
/** * Constructs the all important HiveContext, then evaluates the wrapped Statement. * Currently, the HiveContext is made as a singleton. * * @throws Throwable as required by the Statement class */ @Override public void evaluate() throws Throwable { if (sparkSqlContextSingleton == null) { SparkConf sparkConf = new SparkConf().setAppName("HiveQLUnit").setMaster(serverAddress); SparkSession sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate(); sparkSqlContextSingleton = sparkSession.sqlContext(); } wrappedStatement.evaluate(); }