public UnsafeSorterSpillReader( SerializerManager serializerManager, File file, BlockId blockId) throws IOException { assert (file.length() > 0); long bufferSizeBytes = SparkEnv.get() == null ? DEFAULT_BUFFER_SIZE_BYTES: SparkEnv.get().conf().getSizeAsBytes("spark.unsafe.sorter.spill.reader.buffer.size", DEFAULT_BUFFER_SIZE_BYTES); if (bufferSizeBytes > MAX_BUFFER_SIZE_BYTES || bufferSizeBytes < DEFAULT_BUFFER_SIZE_BYTES) { // fall back to a sane default value logger.warn("Value of config \"spark.unsafe.sorter.spill.reader.buffer.size\" = {} not in " + "allowed range [{}, {}). Falling back to default value : {} bytes", bufferSizeBytes, DEFAULT_BUFFER_SIZE_BYTES, MAX_BUFFER_SIZE_BYTES, DEFAULT_BUFFER_SIZE_BYTES); bufferSizeBytes = DEFAULT_BUFFER_SIZE_BYTES; } final InputStream bs = new NioBufferedFileInputStream(file, (int) bufferSizeBytes); try { this.in = serializerManager.wrapStream(blockId, bs); this.din = new DataInputStream(this.in); numRecords = numRecordsRemaining = din.readInt(); } catch (IOException e) { Closeables.close(bs, /* swallowIOException = */ true); throw e; } }
public BytesToBytesMap( TaskMemoryManager taskMemoryManager, int initialCapacity, long pageSizeBytes, boolean enablePerfMetrics) { this( taskMemoryManager, SparkEnv.get() != null ? SparkEnv.get().blockManager() : null, SparkEnv.get() != null ? SparkEnv.get().serializerManager() : null, initialCapacity, // In order to re-use the longArray for sorting, the load factor cannot be larger than 0.5. 0.5, pageSizeBytes, enablePerfMetrics); }
/** * Get the root directory that contains files added through `SparkContext.addFile()`. */ public static String getRootDirectory() { return SparkEnv.get().sparkFilesDir(); } }
/** * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] * * Note that the map will be reset for inserting new records, and the returned sorter can NOT be * used to insert records. */ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { return new UnsafeKVExternalSorter( groupingKeySchema, aggregationBufferSchema, SparkEnv.get().blockManager(), SparkEnv.get().serializerManager(), map.getPageSizeBytes(), SparkEnv.get().conf().getLong("spark.shuffle.spill.numElementsForceSpillThreshold", UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD), map); } }
@SuppressWarnings("unchecked") @Override public Map<Integer, SparkWatermarks> load(@Nonnull String key) throws Exception { final BlockManager blockManager = SparkEnv.get().blockManager(); final Map<Integer, SparkWatermarks> watermarks = fetchSparkWatermarks(blockManager); return watermarks != null ? watermarks : Maps.newHashMap(); } }
public static Serializer getSerializer() { Serializer result = SparkEnv.get().serializer(); return result; }
@Override public Map<String, String> call() throws Exception { SparkEnv env = SparkEnv$.MODULE$.get(); if (env == null) { return Collections.emptyMap(); } else { Map<String, String> tagMap = new HashMap<>(); SparkConf conf = env.conf(); put(tagMap, conf, "spark.app.name", "appName"); return tagMap; } }
/** * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] * * Note that the map will be reset for inserting new records, and the returned sorter can NOT be * used to insert records. */ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { return new UnsafeKVExternalSorter( groupingKeySchema, aggregationBufferSchema, SparkEnv.get().blockManager(), SparkEnv.get().serializerManager(), map.getPageSizeBytes(), (int) SparkEnv.get().conf().get( package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()), map); } }
@VisibleForTesting public static synchronized void clear() { sourceTimes.clear(); lastWatermarkedBatchTime = 0; writeLocalWatermarkCopy(null); final SparkEnv sparkEnv = SparkEnv.get(); if (sparkEnv != null) { final BlockManager blockManager = sparkEnv.blockManager(); blockManager.removeBlock(WATERMARKS_BLOCK_ID, true); } }
stateCombiner, new SparkStateContext( settings, SparkEnv.get().serializer(), listStorageMaxElements), new LazyAccumulatorProvider( context.getAccumulatorFactory(), context.getSettings())))
/** * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] * * Note that the map will be reset for inserting new records, and the returned sorter can NOT be * used to insert records. */ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { return new UnsafeKVExternalSorter( groupingKeySchema, aggregationBufferSchema, SparkEnv.get().blockManager(), SparkEnv.get().serializerManager(), map.getPageSizeBytes(), (int) SparkEnv.get().conf().get( package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()), map); } }
assert (file.length() > 0); long bufferSizeBytes = SparkEnv.get() == null ? DEFAULT_BUFFER_SIZE_BYTES: SparkEnv.get().conf().getSizeAsBytes("spark.unsafe.sorter.spill.reader.buffer.size", DEFAULT_BUFFER_SIZE_BYTES); if (bufferSizeBytes > MAX_BUFFER_SIZE_BYTES || bufferSizeBytes < DEFAULT_BUFFER_SIZE_BYTES) { final boolean readAheadEnabled = SparkEnv.get() != null && SparkEnv.get().conf().getBoolean("spark.unsafe.sorter.spill.read.ahead.enabled", true);
public BytesToBytesMap( TaskMemoryManager taskMemoryManager, int initialCapacity, long pageSizeBytes, boolean enablePerfMetrics) { this( taskMemoryManager, SparkEnv.get() != null ? SparkEnv.get().blockManager() : null, SparkEnv.get() != null ? SparkEnv.get().serializerManager() : null, initialCapacity, // In order to re-use the longArray for sorting, the load factor cannot be larger than 0.5. 0.5, pageSizeBytes, enablePerfMetrics); }
/** * Advances the watermarks to the next-in-line watermarks. SparkWatermarks are monotonically * increasing. */ public static void advance(final String batchId) { synchronized (GlobalWatermarkHolder.class) { final BlockManager blockManager = SparkEnv.get().blockManager(); final Map<Integer, SparkWatermarks> newWatermarks = computeNewWatermarks(blockManager); if (!newWatermarks.isEmpty()) { writeRemoteWatermarkBlock(newWatermarks, blockManager); writeLocalWatermarkCopy(newWatermarks); } else { LOG.info("No new watermarks could be computed upon completion of batch: {}", batchId); } } }
/** * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] * * Note that the map will be reset for inserting new records, and the returned sorter can NOT be * used to insert records. */ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { return new UnsafeKVExternalSorter( groupingKeySchema, aggregationBufferSchema, SparkEnv.get().blockManager(), SparkEnv.get().serializerManager(), map.getPageSizeBytes(), (int) SparkEnv.get().conf().get( package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()), map); } }
assert (file.length() > 0); long bufferSizeBytes = SparkEnv.get() == null ? DEFAULT_BUFFER_SIZE_BYTES: SparkEnv.get().conf().getSizeAsBytes("spark.unsafe.sorter.spill.reader.buffer.size", DEFAULT_BUFFER_SIZE_BYTES); if (bufferSizeBytes > MAX_BUFFER_SIZE_BYTES || bufferSizeBytes < DEFAULT_BUFFER_SIZE_BYTES) { final boolean readAheadEnabled = SparkEnv.get() != null && SparkEnv.get().conf().getBoolean("spark.unsafe.sorter.spill.read.ahead.enabled", true);
public BytesToBytesMap( TaskMemoryManager taskMemoryManager, int initialCapacity, long pageSizeBytes, boolean enablePerfMetrics) { this( taskMemoryManager, SparkEnv.get() != null ? SparkEnv.get().blockManager() : null, SparkEnv.get() != null ? SparkEnv.get().serializerManager() : null, initialCapacity, // In order to re-use the longArray for sorting, the load factor cannot be larger than 0.5. 0.5, pageSizeBytes, enablePerfMetrics); }
private UnsafeExternalRowSorter( StructType schema, Supplier<RecordComparator> recordComparatorSupplier, PrefixComparator prefixComparator, PrefixComputer prefixComputer, long pageSizeBytes, boolean canUseRadixSort) throws IOException { this.schema = schema; this.prefixComputer = prefixComputer; final SparkEnv sparkEnv = SparkEnv.get(); final TaskContext taskContext = TaskContext.get(); sorter = UnsafeExternalSorter.create( taskContext.taskMemoryManager(), sparkEnv.blockManager(), sparkEnv.serializerManager(), taskContext, recordComparatorSupplier.get(), prefixComparator, sparkEnv.conf().getInt("spark.shuffle.sort.initialBufferSize", DEFAULT_INITIAL_SORT_BUFFER_SIZE), pageSizeBytes, SparkEnv.get().conf().getLong("spark.shuffle.spill.numElementsForceSpillThreshold", UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD), canUseRadixSort ); }
private UnsafeExternalRowSorter( StructType schema, Supplier<RecordComparator> recordComparatorSupplier, PrefixComparator prefixComparator, PrefixComputer prefixComputer, long pageSizeBytes, boolean canUseRadixSort) throws IOException { this.schema = schema; this.prefixComputer = prefixComputer; final SparkEnv sparkEnv = SparkEnv.get(); final TaskContext taskContext = TaskContext.get(); sorter = UnsafeExternalSorter.create( taskContext.taskMemoryManager(), sparkEnv.blockManager(), sparkEnv.serializerManager(), taskContext, recordComparatorSupplier, prefixComparator, sparkEnv.conf().getInt("spark.shuffle.sort.initialBufferSize", DEFAULT_INITIAL_SORT_BUFFER_SIZE), pageSizeBytes, (int) SparkEnv.get().conf().get( package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()), canUseRadixSort ); }