public String call(Row row) { return row.getString(0); }}); System.out.println(topTweetText.collect());
@Override public SparkSession getOrCreate() { return sparkSessionBuilder.getOrCreate(); }
private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) { SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate(); final Dataset intermediateTable = sparkSession.table(hiveTable); return intermediateTable.javaRDD().map(new Function<Row, String[]>() { @Override public String[] call(Row row) throws Exception { String[] result = new String[row.size()]; for (int i = 0; i < row.size(); i++) { final Object o = row.get(i); if (o != null) { result[i] = o.toString(); } else { result[i] = null; } } return result; } }); }
SparkConf conf = new SparkConf(); JavaSparkContext sc = new JavaSparkContext(conf); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(inputFile); input.printSchema(); input.registerTempTable("tweets"); DataFrame topTweets = sqlCtx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10"); Row[] result = topTweets.collect(); for (Row row : result) { System.out.println(row.get(0)); JavaRDD<String> topTweetText = topTweets.toJavaRDD().map(new Function<Row, String>() { public String call(Row row) { return row.getString(0); peopleList.add(new HappyPerson("holden", "coffee")); JavaRDD<HappyPerson> happyPeopleRDD = sc.parallelize(peopleList); DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class); happyPeopleSchemaRDD.registerTempTable("happy_people"); sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() { @Override public Integer call(String str) throws Exception { DataFrame tweetLength = sqlCtx.sql("SELECT stringLengthJava('text') FROM tweets LIMIT 10"); Row[] lengths = tweetLength.collect(); for (Row row : result) {
@Override public SparkSession getOrCreate() { return SparkSession.builder().config(getSparkConf()).getOrCreate(); }
@Override public TryResult configure(KernelFunctionality kernel, SparkUIApi sparkUI, Message parentMessage) { SparkConf sparkConf = createSparkConf(sparkUI.getAdvancedOptions(), getSparkConfBasedOn(this.sparkSessionBuilder)); sparkConf = configureSparkConf(sparkConf, sparkUI); this.sparkSessionBuilder = SparkSession.builder().config(sparkConf); if (sparkUI.getHiveSupport()) { this.sparkSessionBuilder.enableHiveSupport(); } TryResult sparkSessionTry = createSparkSession(sparkUI, parentMessage); if (sparkSessionTry.isError()) { return sparkSessionTry; } addListener(getOrCreate().sparkContext(), sparkUI); SparkVariable.putSparkSession(getOrCreate()); TryResult tryResultSparkContext = initSparkContextInShell(kernel, parentMessage); if (!tryResultSparkContext.isError()) { kernel.registerCancelHook(SparkVariable::cancelAllJobs); } return tryResultSparkContext; }
@Override public String getSparkUiWebUrl() { return getOrCreate().sparkContext().uiWebUrl().get(); }
private MagicCommandOutcomeItem createSparkUiBasedOnEmptyConfiguration(MagicCommandExecutionParam param, List<SparkOptionCommand> options, SimpleEvaluationObject seo) { InternalVariable.setValue(seo); SparkSession.Builder config = SparkSession.builder().config(new SparkConf()); createSparkUI(config, param.getCode().getMessage(), options); return new MagicCommandOutput(MagicCommandOutput.Status.OK); }
public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage LoadHive sparkMaster tbl"); } String master = args[0]; String tbl = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadhive", System.getenv("SPARK_HOME"), System.getenv("JARS")); SQLContext sqlCtx = new SQLContext(sc); DataFrame rdd = sqlCtx.sql("SELECT key, value FROM src"); JavaRDD<Integer> squaredKeys = rdd.toJavaRDD().map(new SquareKey()); List<Integer> result = squaredKeys.collect(); for (Integer elem : result) { System.out.println(elem); } } }
@Test public void sparkVersion() { //given SparkEngineImpl sparkEngine = new SparkEngineImpl(SparkSession.builder()); //when String version = sparkEngine.sparkVersion(); //then assertThat(version).isEqualTo("2.2.1"); } }
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage LoadJsonWithSparkSQL sparkMaster jsonFile"); } String master = args[0]; String jsonFile = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadJsonwithsparksql"); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(jsonFile); input.printSchema(); } }
String cqlStatement = "SELECT * FROM local"; for (Row row : session.execute(cqlStatement)) { System.out.println(row.toString()); }
public void cancelAllJobs() { getSparkSession().sparkContext().cancelAllJobs(); }
private String jobLink(int jobId) { if (getSparkSession().sparkContext().uiWebUrl().isDefined()) { return getSparkSession().sparkContext().uiWebUrl().get() + "/jobs/job/?id=" + jobId; } else { return ""; } }
private String stageLink(int stageId) { if (getSparkSession().sparkContext().uiWebUrl().isDefined()) { return getSparkSession().sparkContext().uiWebUrl().get() + "/stages/stage/?id=" + stageId + "&attempt=0"; } else { return ""; } }