/** * Saves related Spark and Alluxio configuration information. * * @param conf the current SparkConf * @param reportWriter save user-facing messages to a generated file */ private void printConfigInfo(SparkConf conf, PrintWriter reportWriter) { // Get Spark configurations if (conf.contains("spark.master")) { reportWriter.printf("Spark master is: %s.%n%n", conf.get("spark.master")); } if (conf.contains("spark.submit.deployMode")) { reportWriter.printf("spark-submit deploy mode is: %s.%n%n", conf.get("spark.submit.deployMode")); } if (conf.contains("spark.driver.extraClassPath")) { reportWriter.printf("spark.driver.extraClassPath includes jar paths: %s.%n%n", conf.get("spark.driver.extraClassPath")); } if (conf.contains("spark.executor.extraClassPath")) { reportWriter.printf("spark.executor.extraClassPath includes jar paths: %s.%n%n", conf.get("spark.executor.extraClassPath")); } }
@Override public void loadDefaults(SparkSession.Builder builder) { SparkConf sparkConf = SparkEngineImpl.getSparkConfBasedOn(builder); loadProfiles(); Map<String, Object> map = getProfileByName(currentProfile); if (map != null) { map.entrySet().stream() .filter(x -> !sparkConf.contains(x.getKey())) .forEach(x -> addToBuilder(builder, x.getKey(), x.getValue())); } }
public static boolean needUploadToHDFS(URI source, SparkConf sparkConf) { String master = sparkConf.get("spark.master"); String deployMode = sparkConf.contains("spark.submit.deployMode") ? sparkConf.get("spark.submit.deployMode") : null; return SparkClientUtilities.isYarnClusterMode(master, deployMode) && !(source.getScheme().equals("hdfs") || source.getScheme().equals("viewfs")); }
private static boolean isLocalSpark(SparkConf sparkConf) { return sparkConf.contains(SPARK_MASTER) && sparkConf.get(SPARK_MASTER) != null && sparkConf.get("spark.master").startsWith("local"); }
public static boolean needUploadToHDFS(URI source, SparkConf sparkConf) { String master = sparkConf.get("spark.master"); String deployMode = sparkConf.contains("spark.submit.deployMode") ? sparkConf.get("spark.submit.deployMode") : null; return SparkClientUtilities.isYarnClusterMode(master, deployMode) && !source.getScheme().equals("hdfs"); }
private Text createMasterURL() { Text masterURL = new Text(); masterURL.setDescription("Master URL"); masterURL.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-master-url"))); if (getSparkConf().contains(SPARK_MASTER)) { masterURL.setValue(getSparkConf().get(SPARK_MASTER)); } else { masterURL.setValue(SPARK_MASTER_DEFAULT); } return masterURL; }
private Text createExecutorCores() { Text cores = new Text(); cores.setDescription("Executor Cores"); cores.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-executor-cores"))); if (getSparkConf().contains(SPARK_EXECUTOR_CORES)) { cores.setValue(getSparkConf().get(SPARK_EXECUTOR_CORES)); } else { cores.setValue(SparkUI.SPARK_EXECUTOR_CORES_DEFAULT); } return cores; }
private Text createExecutorMemory() { Text memory = new Text(); memory.setDescription("Executor Memory"); memory.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-executor-memory"))); if (getSparkConf().contains(SPARK_EXECUTOR_MEMORY)) { memory.setValue(getSparkConf().get(SPARK_EXECUTOR_MEMORY)); } else { memory.setValue(SparkUI.SPARK_EXECUTOR_MEMORY_DEFAULT); } return memory; }
String masterURL = sparkConf.get("spark.master"); if (masterURL.startsWith("spark") || masterURL.startsWith("local")) { totalCores = sparkConf.contains("spark.default.parallelism") ? sparkConf.getInt("spark.default.parallelism", 1) : hiveSparkClient.getDefaultParallelism();
private SparkConf createSparkConf(List<SparkConfiguration.Configuration> configurations, SparkConf old) { SparkConf sparkConf = new SparkConf(); sparkConf.set(SPARK_EXTRA_LISTENERS, old.get(SPARK_EXTRA_LISTENERS)); sparkConf.set(BEAKERX_ID, old.get(BEAKERX_ID)); if (old.contains(SPARK_APP_NAME)) { sparkConf.set(SPARK_APP_NAME, old.get(SPARK_APP_NAME)); } configurations.forEach(x -> { if (x.getName() != null) { sparkConf.set(x.getName(), (x.getValue() != null) ? x.getValue() : ""); } }); return sparkConf; }
String masterURL = sparkConf.get("spark.master"); if (masterURL.startsWith("spark")) { totalCores = sparkConf.contains("spark.default.parallelism") ? sparkConf.getInt("spark.default.parallelism", 1) : hiveSparkClient.getDefaultParallelism();
private SparkConf configureSparkConf(SparkConf sparkConf, SparkUIApi sparkUI) { if (!sparkConf.contains(SPARK_APP_NAME)) { sparkConf.setAppName("beaker_" + UUID.randomUUID().toString()); } if (sparkUI.getMasterURL().getValue() != null && !sparkUI.getMasterURL().getValue().isEmpty()) { sparkConf.set(SPARK_MASTER, sparkUI.getMasterURL().getValue()); } if (!isLocalSpark(sparkConf)) { sparkConf.set(SPARK_REPL_CLASS_OUTPUT_DIR, KernelManager.get().getOutDir()); } if (sparkUI.getExecutorMemory().getValue() != null && !sparkUI.getExecutorMemory().getValue().isEmpty()) { sparkConf.set(SPARK_EXECUTOR_MEMORY, sparkUI.getExecutorMemory().getValue()); } if (sparkUI.getExecutorCores().getValue() != null && !sparkUI.getExecutorCores().getValue().isEmpty()) { sparkConf.set(SPARK_EXECUTOR_CORES, sparkUI.getExecutorCores().getValue()); } return sparkConf; }
private static void initializeBatchJob() { SparkConf sparkConf = getSparkConfiguration(INSTANCE.config, INSTANCE.mode); if (!sparkConf.contains("spark.master")) { LOG.warn("Spark master not provided, instead using local mode"); sparkConf.setMaster("local[*]"); } if (!sparkConf.contains("spark.app.name")) { LOG.warn("Spark application name not provided, instead using empty string"); sparkConf.setAppName(""); } SparkSession.Builder sparkSessionBuilder = SparkSession.builder(); if (enablesHiveSupport()) { sparkSessionBuilder.enableHiveSupport(); } INSTANCE.ss = sparkSessionBuilder.config(sparkConf).getOrCreate(); }
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) { if (usesProvidedSparkContext) { LOG.info("Using a provided Spark Context"); JavaSparkContext jsc = contextOptions.getProvidedSparkContext(); if (jsc == null || jsc.sc().isStopped()) { LOG.error("The provided Spark context " + jsc + " was not created or was stopped"); throw new RuntimeException("The provided Spark context was not created or was stopped"); } return jsc; } else { LOG.info("Creating a brand new Spark Context."); SparkConf conf = new SparkConf(); if (!conf.contains("spark.master")) { // set master if not set. conf.setMaster(contextOptions.getSparkMaster()); } if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) { conf.setJars(contextOptions.getFilesToStage().toArray(new String[0])); } conf.setAppName(contextOptions.getAppName()); // register immutable collections serializers because the SDK uses them. conf.set("spark.kryo.registrator", BeamSparkRunnerRegistrator.class.getName()); return new JavaSparkContext(conf); } } }
/** * this method initializes spark conf with default properties * it also reads the spark defaults from * the given properties file and override parameters accordingly. it also adds the port offset * to all the port configurations * * @param portOffset port offset * @param propsFile location of the properties file */ private SparkConf initializeSparkConf(int portOffset, String propsFile) throws AnalyticsException { // create a spark conf object without loading defaults SparkConf conf = new SparkConf(false); // read the properties from the file. this file would be the primary locations where the // defaults are loaded from logDebug("Loading Spark defaults from " + propsFile); scala.collection.Map<String, String> properties = Utils.getPropertiesFromFile(propsFile); conf.setAll(properties); conf.set(AnalyticsConstants.SPARK_EXTRA_LISTENERS, "org.wso2.carbon.analytics.spark.core.internal.JavaSparkApplicationListener"); if (!conf.contains("carbon.ds.legacy.export.mode")) { this.exportDataSourcesAsProperties(); } setAdditionalConfigs(conf); addSparkPropertiesPortOffset(conf, portOffset); return conf; }
String masterURL = sparkConf.get("spark.master"); if (masterURL.startsWith("spark")) { totalCores = sparkConf.contains("spark.default.parallelism") ? sparkConf.getInt("spark.default.parallelism", 1) : hiveSparkClient.getDefaultParallelism();
@Test public void testSparkPassthroughGood() { Config config = ConfigUtils.configFromPath( this.getClass().getResource(RESOURCES_PATH + "/spark-passthrough-good.conf").getPath()); Contexts.initialize(config, Contexts.ExecutionMode.UNIT_TEST); SparkConf sparkConf = Contexts.getSparkSession().sparkContext().getConf(); assertTrue(sparkConf.contains("spark.driver.allowMultipleContexts")); assertEquals("true", sparkConf.get("spark.driver.allowMultipleContexts")); assertTrue(sparkConf.contains("spark.master")); assertEquals("local[1]", sparkConf.get("spark.master")); }
@Test public void testDefaultBatchConfiguration() { Config config = ConfigFactory.empty(); Contexts.initialize(config, Contexts.ExecutionMode.BATCH); SparkConf sparkConf = Contexts.getSparkSession().sparkContext().getConf(); assertTrue(!sparkConf.contains("spark.dynamicAllocation.enabled")); assertTrue(!sparkConf.contains("spark.sql.shuffle.partitions")); assertEquals(sparkConf.get("spark.sql.catalogImplementation"), "hive"); }
public SparkExecutor(SparkPlatform platform, Job job) { super(job); this.platform = platform; this.sparkContextReference = this.platform.getSparkContext(job); this.sparkContextReference.noteObtainedReference(); this.sc = this.sparkContextReference.get(); if (this.sc.getConf().contains("spark.executor.cores")) { this.numDefaultPartitions = 2 * this.sc.getConf().getInt("spark.executor.cores", -1); } else { this.numDefaultPartitions = (int) (2 * this.getConfiguration().getLongProperty("rheem.spark.machines") * this.getConfiguration().getLongProperty("rheem.spark.cores-per-machine")); } }
@Test public void testDefaultStreamingConfiguration() { Config config = ConfigFactory.empty(); Contexts.initialize(config, Contexts.ExecutionMode.STREAMING); SparkConf sparkConf = Contexts.getSparkSession().sparkContext().getConf(); assertTrue(sparkConf.contains("spark.dynamicAllocation.enabled")); assertTrue(sparkConf.contains("spark.sql.shuffle.partitions")); assertEquals(sparkConf.get("spark.sql.catalogImplementation"), "hive"); }