sparkConf.get("spark.executor.memory", "512m")); double memoryFraction = 1.0 - sparkConf.getDouble("spark.storage.memoryFraction", 0.6); long totalMemory = (long) (numExecutors * executorMemoryInMB * memoryFraction * 1024 * 1024); int totalCores; String masterURL = sparkConf.get("spark.master"); if (masterURL.startsWith("spark") || masterURL.startsWith("local")) { totalCores = sparkConf.contains("spark.default.parallelism") ?
ShuffleExternalSorter( TaskMemoryManager memoryManager, BlockManager blockManager, TaskContext taskContext, int initialSize, int numPartitions, SparkConf conf, ShuffleWriteMetrics writeMetrics) { super(memoryManager, (int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, memoryManager.pageSizeBytes()), memoryManager.getTungstenMemoryMode()); this.taskMemoryManager = memoryManager; this.blockManager = blockManager; this.taskContext = taskContext; this.numPartitions = numPartitions; // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSizeBytes = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024; this.numElementsForSpillThreshold = (int) conf.get(package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()); this.writeMetrics = writeMetrics; this.inMemSorter = new ShuffleInMemorySorter( this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true)); this.peakMemoryUsedBytes = getMemoryUsage(); this.diskWriteBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE()); }
public static boolean needUploadToHDFS(URI source, SparkConf sparkConf) { String master = sparkConf.get("spark.master"); String deployMode = sparkConf.contains("spark.submit.deployMode") ? sparkConf.get("spark.submit.deployMode") : null; return SparkClientUtilities.isYarnClusterMode(master, deployMode) && !(source.getScheme().equals("hdfs") || source.getScheme().equals("viewfs")); }
sparkConf.get("spark.executor.memory", "512m")); double memoryFraction = 1.0 - sparkConf.getDouble("spark.storage.memoryFraction", 0.6); long totalMemory = (long) (numExecutors * executorMemoryInMB * memoryFraction * 1024 * 1024); int totalCores; String masterURL = sparkConf.get("spark.master"); if (masterURL.startsWith("spark")) { totalCores = sparkConf.contains("spark.default.parallelism") ?
ShuffleExternalSorter( TaskMemoryManager memoryManager, BlockManager blockManager, TaskContext taskContext, int initialSize, int numPartitions, SparkConf conf, ShuffleWriteMetrics writeMetrics) { super(memoryManager, (int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, memoryManager.pageSizeBytes()), memoryManager.getTungstenMemoryMode()); this.taskMemoryManager = memoryManager; this.blockManager = blockManager; this.taskContext = taskContext; this.numPartitions = numPartitions; // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSizeBytes = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024; this.numElementsForSpillThreshold = (int) conf.get(package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()); this.writeMetrics = writeMetrics; this.inMemSorter = new ShuffleInMemorySorter( this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true)); this.peakMemoryUsedBytes = getMemoryUsage(); this.diskWriteBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE()); }
public static boolean needUploadToHDFS(URI source, SparkConf sparkConf) { String master = sparkConf.get("spark.master"); String deployMode = sparkConf.contains("spark.submit.deployMode") ? sparkConf.get("spark.submit.deployMode") : null; return SparkClientUtilities.isYarnClusterMode(master, deployMode) && !source.getScheme().equals("hdfs"); }
/** * Saves related Spark and Alluxio configuration information. * * @param conf the current SparkConf * @param reportWriter save user-facing messages to a generated file */ private void printConfigInfo(SparkConf conf, PrintWriter reportWriter) { // Get Spark configurations if (conf.contains("spark.master")) { reportWriter.printf("Spark master is: %s.%n%n", conf.get("spark.master")); } if (conf.contains("spark.submit.deployMode")) { reportWriter.printf("spark-submit deploy mode is: %s.%n%n", conf.get("spark.submit.deployMode")); } if (conf.contains("spark.driver.extraClassPath")) { reportWriter.printf("spark.driver.extraClassPath includes jar paths: %s.%n%n", conf.get("spark.driver.extraClassPath")); } if (conf.contains("spark.executor.extraClassPath")) { reportWriter.printf("spark.executor.extraClassPath includes jar paths: %s.%n%n", conf.get("spark.executor.extraClassPath")); } }
private SparkConf createSparkConf(List<SparkConfiguration.Configuration> configurations, SparkConf old) { SparkConf sparkConf = new SparkConf(); sparkConf.set(SPARK_EXTRA_LISTENERS, old.get(SPARK_EXTRA_LISTENERS)); sparkConf.set(BEAKERX_ID, old.get(BEAKERX_ID)); if (old.contains(SPARK_APP_NAME)) { sparkConf.set(SPARK_APP_NAME, old.get(SPARK_APP_NAME)); } configurations.forEach(x -> { if (x.getName() != null) { sparkConf.set(x.getName(), (x.getValue() != null) ? x.getValue() : ""); } }); return sparkConf; }
/** * Implements Spark with Alluxio integration checker. * * @param sc current JavaSparkContext * @param reportWriter save user-facing messages to a generated file * @return performIntegrationChecks results */ private Status run(JavaSparkContext sc, PrintWriter reportWriter, AlluxioConfiguration conf) { // Check whether Spark driver can recognize Alluxio classes and filesystem Status driverStatus = CheckerUtils.performIntegrationChecks(); String driverAddress = sc.getConf().get("spark.driver.host"); switch (driverStatus) { case FAIL_TO_FIND_CLASS: reportWriter.printf("Spark driver: %s failed to recognize Alluxio classes.%n%n", driverAddress); return driverStatus; case FAIL_TO_FIND_FS: reportWriter.printf("Spark driver: %s failed to recognize Alluxio filesystem.%n%n", driverAddress); return driverStatus; default: reportWriter.printf("Spark driver: %s can recognize Alluxio filesystem.%n%n", driverAddress); break; } if (!CheckerUtils.supportAlluxioHA(reportWriter, conf)) { return Status.FAIL_TO_SUPPORT_HA; } return runSparkJob(sc, reportWriter); }
private Text createMasterURL() { Text masterURL = new Text(); masterURL.setDescription("Master URL"); masterURL.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-master-url"))); if (getSparkConf().contains(SPARK_MASTER)) { masterURL.setValue(getSparkConf().get(SPARK_MASTER)); } else { masterURL.setValue(SPARK_MASTER_DEFAULT); } return masterURL; }
private Text createExecutorCores() { Text cores = new Text(); cores.setDescription("Executor Cores"); cores.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-executor-cores"))); if (getSparkConf().contains(SPARK_EXECUTOR_CORES)) { cores.setValue(getSparkConf().get(SPARK_EXECUTOR_CORES)); } else { cores.setValue(SparkUI.SPARK_EXECUTOR_CORES_DEFAULT); } return cores; }
private Text createExecutorMemory() { Text memory = new Text(); memory.setDescription("Executor Memory"); memory.setDomClasses(new ArrayList<>(asList("bx-spark-config", "bx-spark-executor-memory"))); if (getSparkConf().contains(SPARK_EXECUTOR_MEMORY)) { memory.setValue(getSparkConf().get(SPARK_EXECUTOR_MEMORY)); } else { memory.setValue(SparkUI.SPARK_EXECUTOR_MEMORY_DEFAULT); } return memory; }
@Test public void testRegisterKryoClasses() { SparkConf conf = new SparkConf(); conf.registerKryoClasses(new Class<?>[]{ Class1.class, Class2.class }); assertEquals( Class1.class.getName() + "," + Class2.class.getName(), conf.get("spark.kryo.classesToRegister")); }
@Test public void testRegisterKryoClasses() { SparkConf conf = new SparkConf(); conf.registerKryoClasses(new Class<?>[]{ Class1.class, Class2.class }); assertEquals( Class1.class.getName() + "," + Class2.class.getName(), conf.get("spark.kryo.classesToRegister")); }
@Test public void testRegisterKryoClasses() { SparkConf conf = new SparkConf(); conf.registerKryoClasses(new Class<?>[]{ Class1.class, Class2.class }); assertEquals( Class1.class.getName() + "," + Class2.class.getName(), conf.get("spark.kryo.classesToRegister")); }
@Test public void saveAndLoadDefaults() { //given HashMap<String, Object> profileConfig = new HashMap<>(); profileConfig.put(SPARK_ADVANCED_OPTIONS, Arrays.asList( new SparkConfiguration.Configuration("sparkOption2", "3"))); profileConfig.put(SPARK_MASTER, "local[4]"); profileConfig.put(NAME, DEFAULT_PROFILE); List config = new ArrayList(); config.add(profileConfig); //when sut.saveProfile(profileConfig); //then SparkSession.Builder builder = SparkSession.builder(); sut.loadDefaults(builder); SparkConf sparkConfBasedOn = SparkEngineImpl.getSparkConfBasedOn(builder); assertThat(sparkConfBasedOn.get("sparkOption2")).isEqualTo("3"); assertThat(sparkConfBasedOn.get(SPARK_MASTER)).isEqualTo("local[4]"); }
private LocalHiveSparkClient(SparkConf sparkConf, HiveConf hiveConf) throws FileNotFoundException, MalformedURLException { String regJar = null; // the registrator jar should already be in CP when not in test mode if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_IN_TEST)) { String kryoReg = sparkConf.get("spark.kryo.registrator", ""); if (SparkClientUtilities.HIVE_KRYO_REG_NAME.equals(kryoReg)) { regJar = SparkClientUtilities.findKryoRegistratorJar(hiveConf); SparkClientUtilities.addJarToContextLoader(new File(regJar)); } } sc = new JavaSparkContext(sparkConf); if (regJar != null) { sc.addJar(regJar); } jobMetricsListener = new JobMetricsListener(); sc.sc().addSparkListener(jobMetricsListener); }
/** * Force a Spark config to be generated and check that a config value has the expected value * @param conf the Hive config to use as a base * @param paramName the Spark config name to check * @param expectedValue the expected value in the Spark config */ private void checkSparkConf(HiveConf conf, String paramName, String expectedValue) throws HiveException { SparkSessionManager sessionManager = SparkSessionManagerImpl.getInstance(); SparkSessionImpl sparkSessionImpl = (SparkSessionImpl) sessionManager.getSession(null, conf, true); assertTrue(sparkSessionImpl.isOpen()); HiveSparkClient hiveSparkClient = sparkSessionImpl.getHiveSparkClient(); SparkConf sparkConf = hiveSparkClient.getSparkConf(); String cloneConfig = sparkConf.get(paramName); sessionManager.closeSession(sparkSessionImpl); assertEquals(expectedValue, cloneConfig); sessionManager.shutdown(); }