@Override public String getSparkMasterUrl() { RuntimeConfig conf = getOrCreate().conf(); return conf.getAll().get(SPARK_MASTER).get(); }
@Override public String getSparkAppId() { RuntimeConfig conf = getOrCreate().conf(); return conf.getAll().get(SPARK_APP_ID).get(); }
public static ExecutorService createThreadPool(final SparkSession spark, final ParquetStoreProperties storeProperties) { final int numberOfThreads; final Option<String> sparkDriverCores = spark.conf().getOption("spark.driver.cores"); if (sparkDriverCores.nonEmpty()) { numberOfThreads = Integer.parseInt(sparkDriverCores.get()); } else { numberOfThreads = storeProperties.getThreadsAvailable(); } LOGGER.debug("Created thread pool of size {} to aggregate and sort data", numberOfThreads); return Executors.newFixedThreadPool(numberOfThreads); }
public static void configureSparkForAddElements(final SparkSession spark, final ParquetStoreProperties props) { final Integer numberOfOutputFiles = props.getAddElementsOutputFilesPerGroup(); String shufflePartitions = spark.conf().getOption("spark.sql.shuffle.partitions").get(); if (null == shufflePartitions) { shufflePartitions = SQLConf.SHUFFLE_PARTITIONS().defaultValueString(); } if (numberOfOutputFiles > Integer.parseInt(shufflePartitions)) { LOGGER.debug("Setting the number of Spark shuffle partitions to {}", numberOfOutputFiles); spark.conf().set("spark.sql.shuffle.partitions", numberOfOutputFiles); } final Configuration hadoopConf = spark.sparkContext().hadoopConfiguration(); configureSparkConfForAddElements(hadoopConf, props); }
public GenerateIndices(final ParquetStore store, final SparkSession spark) throws OperationException, SerialisationException, StoreException { graphIndex = new GraphIndex(); final int numberOfThreads; final Option<String> sparkDriverCores = spark.conf().getOption("spark.driver.cores"); if (sparkDriverCores.nonEmpty()) { numberOfThreads = Integer.parseInt(sparkDriverCores.get());