org.apache.spark.SparkConf.getInt java code examples

/**
 * Please note that the method is very tied with Spark documentation 1.4.1 regarding
 * dynamic allocation, such as default values.
 * @return
 */
private int getExecutorsToWarm() {
 int minExecutors =
   HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
 boolean dynamicAllocation = hiveConf.getBoolean("spark.dynamicAllocation.enabled", false);
 if (dynamicAllocation) {
  int min = sparkConf.getInt("spark.dynamicAllocation.minExecutors", 0);
  int initExecutors = sparkConf.getInt("spark.dynamicAllocation.initialExecutors", min);
  minExecutors = Math.min(minExecutors, initExecutors);
 } else {
  int execInstances = sparkConf.getInt("spark.executor.instances", 2);
  minExecutors = Math.min(minExecutors, execInstances);
 }
 return minExecutors;
}

/**
 * Please note that the method is very tied with Spark documentation 1.4.1 regarding
 * dynamic allocation, such as default values.
 * @return
 */
private int getExecutorsToWarm() {
 int minExecutors =
   HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
 boolean dynamicAllocation = hiveConf.getBoolean("spark.dynamicAllocation.enabled", false);
 if (dynamicAllocation) {
  int min = sparkConf.getInt("spark.dynamicAllocation.minExecutors", 0);
  int initExecutors = sparkConf.getInt("spark.dynamicAllocation.initialExecutors", min);
  minExecutors = Math.min(minExecutors, initExecutors);
 } else {
  int execInstances = sparkConf.getInt("spark.executor.instances", 2);
  minExecutors = Math.min(minExecutors, execInstances);
 }
 return minExecutors;
}

if (masterURL.startsWith("spark") || masterURL.startsWith("local")) {
 totalCores = sparkConf.contains("spark.default.parallelism") ?
     sparkConf.getInt("spark.default.parallelism", 1) :
     hiveSparkClient.getDefaultParallelism();
 totalCores = Math.max(totalCores, numExecutors);
} else {
 int coresPerExecutor = sparkConf.getInt("spark.executor.cores", 1);
 totalCores = numExecutors * coresPerExecutor;
totalCores = totalCores / sparkConf.getInt("spark.task.cpus", 1);

if (masterURL.startsWith("spark")) {
 totalCores = sparkConf.contains("spark.default.parallelism") ?
   sparkConf.getInt("spark.default.parallelism", 1) :
   hiveSparkClient.getDefaultParallelism();
 totalCores = Math.max(totalCores, numExecutors);
} else {
 int coresPerExecutor = sparkConf.getInt("spark.executor.cores", 1);
 totalCores = numExecutors * coresPerExecutor;
totalCores = totalCores / sparkConf.getInt("spark.task.cpus", 1);

this.sparkConf = sparkConf;
this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
                       DEFAULT_INITIAL_SORT_BUFFER_SIZE);
open();

this.sparkConf = sparkConf;
this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
                       DEFAULT_INITIAL_SORT_BUFFER_SIZE);
this.inputBufferSizeInBytes =

this.sparkConf = sparkConf;
this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
                       DEFAULT_INITIAL_SORT_BUFFER_SIZE);
this.inputBufferSizeInBytes =

public GryoSerializer(final SparkConf sparkConfiguration) {
  final long bufferSizeKb = sparkConfiguration.getSizeAsKb("spark.kryoserializer.buffer", "64k");
  final long maxBufferSizeMb = sparkConfiguration.getSizeAsMb("spark.kryoserializer.buffer.max", "64m");
  this.referenceTracking = sparkConfiguration.getBoolean("spark.kryo.referenceTracking", true);
  this.registrationRequired = sparkConfiguration.getBoolean(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, false);
  if (bufferSizeKb >= ByteUnit.GiB.toKiB(2L)) {
    throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than 2048 mb, got: " + bufferSizeKb + " mb.");
  } else {
    this.bufferSize = (int) ByteUnit.KiB.toBytes(bufferSizeKb);
    if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2L)) {
      throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than 2048 mb, got: " + maxBufferSizeMb + " mb.");
    } else {
      this.maxBufferSize = (int) ByteUnit.MiB.toBytes(maxBufferSizeMb);
      //this.userRegistrator = sparkConfiguration.getOption("spark.kryo.registrator");
    }
  }
  // create a GryoPool and store it in static HadoopPools
  final List<Object> ioRegistries = new ArrayList<>();
  ioRegistries.addAll(makeApacheConfiguration(sparkConfiguration).getList(IoRegistry.IO_REGISTRY, Collections.emptyList()));
  ioRegistries.add(SparkIoRegistry.class.getCanonicalName().replace("." + SparkIoRegistry.class.getSimpleName(), "$" + SparkIoRegistry.class.getSimpleName()));
  HadoopPools.initialize(GryoPool.build().
      version(GryoVersion.valueOf(sparkConfiguration.get(GryoPool.CONFIG_IO_GRYO_VERSION, GryoPool.CONFIG_IO_GRYO_POOL_VERSION_DEFAULT.name()))).
      poolSize(sparkConfiguration.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT)).
      ioRegistries(ioRegistries).
      initializeMapper(builder ->
          builder.referenceTracking(this.referenceTracking).
              registrationRequired(this.registrationRequired)).
      create());
}

comparatorSupplier,
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,
comparatorSupplier,
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,

comparatorSupplier,
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,
comparatorSupplier,
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,

public int getNumPartitionsHint() throws AnalyticsException {
  /* all workers will not have the same CPU count, this is just an approximation */
  int workerCount = this.getWorkerCount();
  int workerCores = this.sparkConf.getInt(AnalyticsConstants.SPARK_WORKER_CORES, 1);
  int partitionCount = workerCount * workerCores;
  if (workerCount == 0) {
    throw new AnalyticsException("Error while calculating NumPartitionsHint. Worker count is zero.");
  }
  if (log.isDebugEnabled()) {
    log.debug("Partition count: " + partitionCount);
  }
  return partitionCount;
}

/**
 * this method starts a spark master with a given parameters.
 */
private synchronized void startMaster() throws AnalyticsClusterException {
  if (!this.masterActive) {
    String host = this.myHost;
    int port = this.sparkConf.getInt(AnalyticsConstants.SPARK_MASTER_PORT, 7077 + this.portOffset);
    int webUiPort = this.sparkConf.getInt(AnalyticsConstants.SPARK_MASTER_WEBUI_PORT, 8081 + this.portOffset);
    Master.startRpcEnvAndEndpoint(host, port, webUiPort, this.sparkConf);
    log.info("[Spark init - master] Started SPARK MASTER in spark://" + host + ":" + port + " with webUI port : " + webUiPort);
    updateMaster(this.sparkConf);
    this.masterActive = true;
  } else {
    logDebug("Master is already active in this node, therefore ignoring Master startup");
  }
}

public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc, 
    JavaPairRDD<String, String> input, final Properties karmaSettings) {
  return reduceJSON(sc, input, sc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}
public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc,

public static JavaRDD<String> reduceJSON(JavaSparkContext jsc, 
    JavaRDD<String> input, final Properties karmaSettings) {
  return reduceJSON(jsc, input, jsc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}

recordComparator,
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,
new KVComparator(ordering, keySchema.length()),
prefixComparator,
SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
               UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
pageSizeBytes,

public static void main(String[] args) {
  // TODO Auto-generated method stub
  SparkConf conf = new SparkConf().setAppName("ImagenetSampler")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
  
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  int numExecutors = conf.getInt("spark.executor.instances", -1);
  System.out.println("number of executors = " + numExecutors);
  System.out.println("Data Loading...");
  JavaPairRDD<FloatWritable, ArrayPrimitiveWritable> train_seq = 
      sc.sequenceFile("imagenet_sampled.hsf", FloatWritable.class, ArrayPrimitiveWritable.class);
  
  train_seq.foreach(new VoidFunction<Tuple2<FloatWritable,ArrayPrimitiveWritable>>() {
    
    @Override
    public void call(Tuple2<FloatWritable, ArrayPrimitiveWritable> arg0) throws Exception {
      System.out.println(arg0._1.get() + " " + ((float[]) arg0._2.get()).length);
    }
  });
  sc.close();
}

public GeoSparkConf(SparkConf sparkConf)
{
  this.useIndex = sparkConf.getBoolean("geospark.global.index", true);
  this.indexType = IndexType.getIndexType(sparkConf.get("geospark.global.indextype", "rtree"));
  this.joinApproximateTotalCount = sparkConf.getLong("geospark.join.approxcount", -1);
  String[] boundaryString = sparkConf.get("geospark.join.boundary", "0,0,0,0").split(",");
  this.datasetBoundary = new Envelope(Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]),
      Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]));
  this.joinGridType = GridType.getGridType(sparkConf.get("geospark.join.gridtype", "quadtree"));
  this.joinBuildSide = JoinBuildSide.getBuildSide(sparkConf.get("geospark.join.indexbuildside", "left"));
  this.joinSparitionDominantSide = JoinSparitionDominantSide.getJoinSparitionDominantSide(sparkConf.get("geospark.join.spatitionside", "left"));
  this.fallbackPartitionNum = sparkConf.getInt("geospark.join.numpartition", -1);
}

public GeoSparkConf(SparkConf sparkConf)
{
  this.useIndex = sparkConf.getBoolean("geospark.global.index", true);
  this.indexType = IndexType.getIndexType(sparkConf.get("geospark.global.indextype", "rtree"));
  this.joinApproximateTotalCount = sparkConf.getLong("geospark.join.approxcount", -1);
  String[] boundaryString = sparkConf.get("geospark.join.boundary", "0,0,0,0").split(",");
  this.datasetBoundary = new Envelope(Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]),
      Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]));
  this.joinGridType = GridType.getGridType(sparkConf.get("geospark.join.gridtype", "quadtree"));
  this.joinBuildSide = JoinBuildSide.getBuildSide(sparkConf.get("geospark.join.indexbuildside", "left"));
  this.joinSparitionDominantSide = JoinSparitionDominantSide.getJoinSparitionDominantSide(sparkConf.get("geospark.join.spatitionside", "left"));
  this.fallbackPartitionNum = sparkConf.getInt("geospark.join.numpartition", -1);
}

private AnalyticsQueryResult toResult(DataFrame dataFrame)
    throws AnalyticsExecutionException {
  int resultsLimit = this.sparkConf.getInt("carbon.spark.results.limit", -1);
  if (resultsLimit != -1) {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.limit(resultsLimit).collect()));
  } else {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.collect()));
  }
}

public SparkExecutor(SparkPlatform platform, Job job) {
  super(job);
  this.platform = platform;
  this.sparkContextReference = this.platform.getSparkContext(job);
  this.sparkContextReference.noteObtainedReference();
  this.sc = this.sparkContextReference.get();
  if (this.sc.getConf().contains("spark.executor.cores")) {
    this.numDefaultPartitions = 2 * this.sc.getConf().getInt("spark.executor.cores", -1);
  } else {
    this.numDefaultPartitions =
        (int) (2 * this.getConfiguration().getLongProperty("rheem.spark.machines")
            * this.getConfiguration().getLongProperty("rheem.spark.cores-per-machine"));
  }
}

Popular methods of SparkConf

Popular in Java

Reactive rest calls using spring rest template
setRequestProperty (URLConnection)
startActivity (Activity)
requestLocationUpdates (LocationManager)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
JTextField (javax.swing)
CodeWhisperer alternatives

How to use getIntmethodin org.apache.spark.SparkConf

Best Java code snippets using org.apache.spark.SparkConf.getInt (Showing top 20 results out of 315)

How to use
getInt
method
in
org.apache.spark.SparkConf