final long partitionLengthInSpill = spills[i].partitionLengths[partition]; if (partitionLengthInSpill > 0) { InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill, false); try {
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
/** * Time (in milliseconds) that we will wait in order to perform a retry after an IOException. * Only relevant if maxIORetries > 0. */ public int ioRetryWaitTimeMs() { return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_IO_RETRYWAIT_KEY, "5s")) * 1000; }
public GryoSerializer(final SparkConf sparkConfiguration) { final long bufferSizeKb = sparkConfiguration.getSizeAsKb("spark.kryoserializer.buffer", "64k"); final long maxBufferSizeMb = sparkConfiguration.getSizeAsMb("spark.kryoserializer.buffer.max", "64m"); this.referenceTracking = sparkConfiguration.getBoolean("spark.kryo.referenceTracking", true); this.registrationRequired = sparkConfiguration.getBoolean(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, false); if (bufferSizeKb >= ByteUnit.GiB.toKiB(2L)) { throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than 2048 mb, got: " + bufferSizeKb + " mb."); } else { this.bufferSize = (int) ByteUnit.KiB.toBytes(bufferSizeKb); if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2L)) { throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than 2048 mb, got: " + maxBufferSizeMb + " mb."); } else { this.maxBufferSize = (int) ByteUnit.MiB.toBytes(maxBufferSizeMb); //this.userRegistrator = sparkConfiguration.getOption("spark.kryo.registrator"); } } // create a GryoPool and store it in static HadoopPools final List<Object> ioRegistries = new ArrayList<>(); ioRegistries.addAll(makeApacheConfiguration(sparkConfiguration).getList(IoRegistry.IO_REGISTRY, Collections.emptyList())); ioRegistries.add(SparkIoRegistry.class.getCanonicalName().replace("." + SparkIoRegistry.class.getSimpleName(), "$" + SparkIoRegistry.class.getSimpleName())); HadoopPools.initialize(GryoPool.build(). version(GryoVersion.valueOf(sparkConfiguration.get(GryoPool.CONFIG_IO_GRYO_VERSION, GryoPool.CONFIG_IO_GRYO_POOL_VERSION_DEFAULT.name()))). poolSize(sparkConfiguration.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT)). ioRegistries(ioRegistries). initializeMapper(builder -> builder.referenceTracking(this.referenceTracking). registrationRequired(this.registrationRequired)). create()); }
final long partitionLengthInSpill = spills[i].partitionLengths[partition]; if (partitionLengthInSpill > 0) { InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill, false); try {
/** * Time (in milliseconds) that we will wait in order to perform a retry after an IOException. * Only relevant if maxIORetries > 0. */ public int ioRetryWaitTimeMs() { return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_IO_RETRYWAIT_KEY, "5s")) * 1000; }
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
final long partitionLengthInSpill = spills[i].partitionLengths[partition]; if (partitionLengthInSpill > 0) { InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill, false); try {
/** Connect timeout in milliseconds. Default 120 secs. */ public int connectionTimeoutMs() { long defaultNetworkTimeoutS = JavaUtils.timeStringAsSec( conf.get("spark.network.timeout", "120s")); long defaultTimeoutMs = JavaUtils.timeStringAsSec( conf.get(SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY, defaultNetworkTimeoutS + "s")) * 1000; return (int) defaultTimeoutMs; }
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException { final ArrayList<Tuple2<Object, Object>> recordsList = new ArrayList<>(); long startOffset = 0; for (int i = 0; i < NUM_PARTITITONS; i++) { final long partitionSize = partitionSizesInMergedFile[i]; if (partitionSize > 0) { FileInputStream fin = new FileInputStream(mergedOutputFile); fin.getChannel().position(startOffset); InputStream in = new LimitedInputStream(fin, partitionSize); in = blockManager.serializerManager().wrapForEncryption(in); if (conf.getBoolean("spark.shuffle.compress", true)) { in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in); } DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in); Iterator<Tuple2<Object, Object>> records = recordsStream.asKeyValueIterator(); while (records.hasNext()) { Tuple2<Object, Object> record = records.next(); assertEquals(i, hashPartitioner.getPartition(record._1())); recordsList.add(record); } recordsStream.close(); startOffset += partitionSize; } } return recordsList; }
/** * Time (in milliseconds) that we will wait in order to perform a retry after an IOException. * Only relevant if maxIORetries > 0. */ public int ioRetryWaitTimeMs() { return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_IO_RETRYWAIT_KEY, "5s")) * 1000; }
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException { final ArrayList<Tuple2<Object, Object>> recordsList = new ArrayList<>(); long startOffset = 0; for (int i = 0; i < NUM_PARTITITONS; i++) { final long partitionSize = partitionSizesInMergedFile[i]; if (partitionSize > 0) { FileInputStream fin = new FileInputStream(mergedOutputFile); fin.getChannel().position(startOffset); InputStream in = new LimitedInputStream(fin, partitionSize); in = blockManager.serializerManager().wrapForEncryption(in); if (conf.getBoolean("spark.shuffle.compress", true)) { in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in); } DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in); Iterator<Tuple2<Object, Object>> records = recordsStream.asKeyValueIterator(); while (records.hasNext()) { Tuple2<Object, Object> record = records.next(); assertEquals(i, hashPartitioner.getPartition(record._1())); recordsList.add(record); } recordsStream.close(); startOffset += partitionSize; } } return recordsList; }
/** Timeout for a single round trip of auth message exchange, in milliseconds. */ public int authRTTimeoutMs() { return (int) JavaUtils.timeStringAsSec(conf.get("spark.network.auth.rpcTimeout", conf.get(SPARK_NETWORK_SASL_TIMEOUT_KEY, "30s"))) * 1000; }
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException { final ArrayList<Tuple2<Object, Object>> recordsList = new ArrayList<>(); long startOffset = 0; for (int i = 0; i < NUM_PARTITITONS; i++) { final long partitionSize = partitionSizesInMergedFile[i]; if (partitionSize > 0) { FileInputStream fin = new FileInputStream(mergedOutputFile); fin.getChannel().position(startOffset); InputStream in = new LimitedInputStream(fin, partitionSize); in = blockManager.serializerManager().wrapForEncryption(in); if (conf.getBoolean("spark.shuffle.compress", true)) { in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in); } DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in); Iterator<Tuple2<Object, Object>> records = recordsStream.asKeyValueIterator(); while (records.hasNext()) { Tuple2<Object, Object> record = records.next(); assertEquals(i, hashPartitioner.getPartition(record._1())); recordsList.add(record); } recordsStream.close(); startOffset += partitionSize; } } return recordsList; }
/** Connect timeout in milliseconds. Default 120 secs. */ public int connectionTimeoutMs() { long defaultNetworkTimeoutS = JavaUtils.timeStringAsSec( conf.get("spark.network.timeout", "120s")); long defaultTimeoutMs = JavaUtils.timeStringAsSec( conf.get(SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY, defaultNetworkTimeoutS + "s")) * 1000; return (int) defaultTimeoutMs; }
final byte[] key = JavaUtils.bufferToArray(entry.getKey()); final byte[] value = entry.getValue(); final BytesToBytesMap.Location loc =
/** * Time (in milliseconds) that we will wait in order to perform a retry after an IOException. * Only relevant if maxIORetries > 0. */ public int ioRetryWaitTimeMs() { return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_IO_RETRYWAIT_KEY, "5s")) * 1000; }