ShuffleExternalSorter( TaskMemoryManager memoryManager, BlockManager blockManager, TaskContext taskContext, int initialSize, int numPartitions, SparkConf conf, ShuffleWriteMetrics writeMetrics) { super(memoryManager, (int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, memoryManager.pageSizeBytes()), memoryManager.getTungstenMemoryMode()); this.taskMemoryManager = memoryManager; this.blockManager = blockManager; this.taskContext = taskContext; this.numPartitions = numPartitions; // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; this.numElementsForSpillThreshold = conf.getLong("spark.shuffle.spill.numElementsForceSpillThreshold", 1024 * 1024 * 1024); this.writeMetrics = writeMetrics; this.inMemSorter = new ShuffleInMemorySorter( this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true)); this.peakMemoryUsedBytes = getMemoryUsage(); }
/** * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] * * Note that the map will be reset for inserting new records, and the returned sorter can NOT be * used to insert records. */ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { return new UnsafeKVExternalSorter( groupingKeySchema, aggregationBufferSchema, SparkEnv.get().blockManager(), SparkEnv.get().serializerManager(), map.getPageSizeBytes(), SparkEnv.get().conf().getLong("spark.shuffle.spill.numElementsForceSpillThreshold", UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD), map); } }
try { SparkConf sparkConf = new SparkConf(); long updateIntervalMs = sparkConf.getLong("spark.yarn.token.renewal.interval", -1L); if (updateIntervalMs <= 0) { return null;
try { SparkConf sparkConf = new SparkConf(); long updateIntervalMs = sparkConf.getLong("spark.yarn.token.renewal.interval", -1L); if (updateIntervalMs <= 0) { return null;
try { SparkConf sparkConf = new SparkConf(); long updateIntervalMs = sparkConf.getLong("spark.yarn.token.renewal.interval", -1L); if (updateIntervalMs <= 0) { return null;
public GeoSparkConf(SparkConf sparkConf) { this.useIndex = sparkConf.getBoolean("geospark.global.index", true); this.indexType = IndexType.getIndexType(sparkConf.get("geospark.global.indextype", "rtree")); this.joinApproximateTotalCount = sparkConf.getLong("geospark.join.approxcount", -1); String[] boundaryString = sparkConf.get("geospark.join.boundary", "0,0,0,0").split(","); this.datasetBoundary = new Envelope(Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0])); this.joinGridType = GridType.getGridType(sparkConf.get("geospark.join.gridtype", "quadtree")); this.joinBuildSide = JoinBuildSide.getBuildSide(sparkConf.get("geospark.join.indexbuildside", "left")); this.joinSparitionDominantSide = JoinSparitionDominantSide.getJoinSparitionDominantSide(sparkConf.get("geospark.join.spatitionside", "left")); this.fallbackPartitionNum = sparkConf.getInt("geospark.join.numpartition", -1); }
public GeoSparkConf(SparkConf sparkConf) { this.useIndex = sparkConf.getBoolean("geospark.global.index", true); this.indexType = IndexType.getIndexType(sparkConf.get("geospark.global.indextype", "rtree")); this.joinApproximateTotalCount = sparkConf.getLong("geospark.join.approxcount", -1); String[] boundaryString = sparkConf.get("geospark.join.boundary", "0,0,0,0").split(","); this.datasetBoundary = new Envelope(Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0]), Double.parseDouble(boundaryString[0])); this.joinGridType = GridType.getGridType(sparkConf.get("geospark.join.gridtype", "quadtree")); this.joinBuildSide = JoinBuildSide.getBuildSide(sparkConf.get("geospark.join.indexbuildside", "left")); this.joinSparitionDominantSide = JoinSparitionDominantSide.getJoinSparitionDominantSide(sparkConf.get("geospark.join.spatitionside", "left")); this.fallbackPartitionNum = sparkConf.getInt("geospark.join.numpartition", -1); }
private UnsafeExternalRowSorter( StructType schema, Supplier<RecordComparator> recordComparatorSupplier, PrefixComparator prefixComparator, PrefixComputer prefixComputer, long pageSizeBytes, boolean canUseRadixSort) throws IOException { this.schema = schema; this.prefixComputer = prefixComputer; final SparkEnv sparkEnv = SparkEnv.get(); final TaskContext taskContext = TaskContext.get(); sorter = UnsafeExternalSorter.create( taskContext.taskMemoryManager(), sparkEnv.blockManager(), sparkEnv.serializerManager(), taskContext, recordComparatorSupplier.get(), prefixComparator, sparkEnv.conf().getInt("spark.shuffle.sort.initialBufferSize", DEFAULT_INITIAL_SORT_BUFFER_SIZE), pageSizeBytes, SparkEnv.get().conf().getLong("spark.shuffle.spill.numElementsForceSpillThreshold", UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD), canUseRadixSort ); }