public void stop() { if (this.sqlCtx != null) { this.sqlCtx.sparkContext().stop(); } }
private void preConditions(DataFrame df) { if (null != df) { if (!StringUtils.startsWith(df.sqlContext().sparkContext().version(), Constants.SUPPORTED_SPARK_VERSION_PREFIX)) { throw new UnsupportedOperationException("Only spark version " + Constants.SUPPORTED_SPARK_VERSION_PREFIX + " is supported by this version of the library"); } } }
private int currentActiveExecutors() { try { scala.collection.Map<String, Tuple2<Object, Object>> executors = this.sqlCtx .sparkContext().getExecutorMemoryStatus(); return executors.size(); } catch (Throwable e) { log.error("Error occurred while checking current Spark active executors.", e); } return 0; }
public AnalyticsRelation(int tenantId, String recordStore, String tableName, SQLContext sqlContext, StructType schema, String incParams, boolean globalTenantAccess, String schemaString, String primaryKeys, boolean mergeFlag, boolean preserveOrder) { this.tenantId = tenantId; this.tableName = tableName; this.recordStore = recordStore; this.sqlContext = sqlContext; this.schema = schema; setIncParams(incParams); this.globalTenantAccess = globalTenantAccess; this.schemaString = schemaString; this.primaryKeys = primaryKeys; this.mergeFlag = mergeFlag; this.recordBatchSize = Integer.parseInt(sqlContext.sparkContext().getConf() .get(AnalyticsConstants.CARBON_INSERT_BATCH_SIZE)); this.preserveOrder = preserveOrder; }
@Override public RDD<Row> buildScan() { log.debug("-> buildScan()"); // I have isolated the work to a method to keep the plumbing code as simple // as // possible. List<List<Integer>> table = collectData(); @SuppressWarnings("resource") // cannot be closed here, done elsewhere JavaSparkContext sparkContext = new JavaSparkContext(sqlContext .sparkContext()); JavaRDD<Row> rowRDD = sparkContext.parallelize(table) .map(row -> RowFactory.create(row.toArray())); return rowRDD.rdd(); }
this.sqlContext.sparkContext(), (Seq<Dependency<?>>) scala.collection.Seq$.MODULE$.empty(), ClassTag$.MODULE$.<Row>apply(Row.class), startTime, endTime, this.incEnable, this.incID);
"Check if the cluster has instantiated properly."); if (this.sqlCtx.sparkContext().stopped().get()) { log.error("Spark context has stopped."); restartSparkContext(); restartSparkContext(); this.sqlCtx.sparkContext().setLocalProperty(AnalyticsConstants.SPARK_SCHEDULER_POOL, this.sparkConf.get(AnalyticsConstants.SPARK_SCHEDULER_POOL)); DataFrame result = this.sqlCtx.sql(query);
registerUDFs(this.sqlCtx); registerUDAFs(this.sqlCtx); this.sqlCtx.sparkContext().addSparkListener(new SparkListener() { @Override public void onStageCompleted(SparkListenerStageCompleted sparkListenerStageCompleted) {
@Before public void setUp() throws IOException { sqlContext = TestHive$.MODULE$; sc = new JavaSparkContext(sqlContext.sparkContext()); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } HiveSessionCatalog catalog = (HiveSessionCatalog) sqlContext.sessionState().catalog(); hiveManagedPath = new Path(catalog.defaultTablePath(new TableIdentifier("javaSavedTable"))); fs = hiveManagedPath.getFileSystem(sc.hadoopConfiguration()); fs.delete(hiveManagedPath, true); List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = sqlContext.createDataset(jsonObjects, Encoders.STRING()); df = sqlContext.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
@Before public void setUp() throws IOException { sqlContext = TestHive$.MODULE$; sc = new JavaSparkContext(sqlContext.sparkContext()); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } HiveSessionCatalog catalog = (HiveSessionCatalog) sqlContext.sessionState().catalog(); hiveManagedPath = new Path(catalog.defaultTablePath(new TableIdentifier("javaSavedTable"))); fs = hiveManagedPath.getFileSystem(sc.hadoopConfiguration()); fs.delete(hiveManagedPath, true); List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = sqlContext.createDataset(jsonObjects, Encoders.STRING()); df = sqlContext.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
private void writeDataFrameToDAL(DataFrame data) { if (this.preserveOrder) { logDebug("Inserting data with order preserved! Each partition will be written using separate jobs."); for (int i = 0; i < data.rdd().partitions().length; i++) { data.sqlContext().sparkContext().runJob(data.rdd(), new AnalyticsWritingFunction(this.tenantId, this.tableName, data.schema(), this.globalTenantAccess, this.schemaString, this.primaryKeys, this.mergeFlag, this.recordStore, this.recordBatchSize), CarbonScalaUtils.getNumberSeq(i, i + 1), false, ClassTag$.MODULE$.Unit()); } } else { data.foreachPartition(new AnalyticsWritingFunction(this.tenantId, this.tableName, data.schema(), this.globalTenantAccess, this.schemaString, this.primaryKeys, this.mergeFlag, this.recordStore, this.recordBatchSize)); } }