MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>( reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); mergeManager.createInMemoryMerger(); List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>(); mergeManager.createInMemoryMerger(); List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>(); mergeManager.close(); mergeManager = new MergeManagerImpl<Text, Text>( reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); MergeThread<CompressAwarePath,Text,Text> onDiskMerger = mergeManager.createOnDiskMerger(); onDiskMerger.merge(paths); Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious")); mergeManager.close(); Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size()); Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
FileSystem fs = FileSystem.getLocal(jobConf); MergeManagerImpl<IntWritable, IntWritable> manager = new MergeManagerImpl<IntWritable, IntWritable>(null, jobConf, fs, null , null, null, null, null, null, null, null, null, null, mapOutputFile); Path path = new Path("somePath"); CompressAwarePath cap = new CompressAwarePath(path, 1l, rand.nextInt()); manager.closeOnDiskFile(cap);
@Test public void testLargeMemoryLimits() throws Exception { final JobConf conf = new JobConf(); // Xmx in production conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 8L * 1024 * 1024 * 1024); // M1 = Xmx fraction for map outputs conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f); // M2 = max M1 fraction for a single maple output conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.95f); // M3 = M1 fraction at which in memory merge is triggered conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 1.0f); // M4 = M1 fraction of map outputs remaining in memory for a reduce conf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 1.0f); final MergeManagerImpl<Text, Text> mgr = new MergeManagerImpl<Text, Text>( null, conf, mock(LocalFileSystem.class), null, null, null, null, null, null, null, null, null, null, new MROutputFiles()); assertTrue("Large shuffle area unusable: " + mgr.memoryLimit, mgr.memoryLimit > Integer.MAX_VALUE); final long maxInMemReduce = mgr.getMaxInMemReduceLimit(); assertTrue("Large in-memory reduce area unusable: " + maxInMemReduce, maxInMemReduce > Integer.MAX_VALUE); assertEquals("maxSingleShuffleLimit to be capped at Integer.MAX_VALUE", Integer.MAX_VALUE, mgr.maxSingleShuffleLimit); verifyReservedMapOutputType(mgr, 10L, "MEMORY"); verifyReservedMapOutputType(mgr, 1L + Integer.MAX_VALUE, "DISK"); }
private void verifyReservedMapOutputType(MergeManagerImpl<Text, Text> mgr, long size, String expectedShuffleMode) throws IOException { final TaskAttemptID mapId = TaskAttemptID.forName("attempt_0_1_m_1_1"); final MapOutput<Text, Text> mapOutput = mgr.reserve(mapId, size, 1); assertEquals("Shuffled bytes: " + size, expectedShuffleMode, mapOutput.getDescription()); mgr.unreserve(size); }
inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs"); final long maxInMemReduce = getMaxInMemReduceLimit(); if (inMemoryMapOutputs.size() > 0) { TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID(); inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce); long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); LOG.info("Merging " + finalSegments.size() + " segments, " +
@Override public void merge(List<InMemoryMapOutput<Text, Text>> inputs) throws IOException { synchronized (this) { numMerges.incrementAndGet(); for (InMemoryMapOutput<Text, Text> input : inputs) { manager.unreserve(input.getSize()); } } try { mergeStart.await(); mergeComplete.await(); } catch (InterruptedException e) { } catch (BrokenBarrierException e) { } } }
@Test public void testZeroShuffleMemoryLimitPercent() throws Exception { final JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.0f); final MergeManagerImpl<Text, Text> mgr = new MergeManagerImpl<>(null, jobConf, mock(LocalFileSystem.class), null, null, null, null, null, null, null, null, null, null, new MROutputFiles()); verifyReservedMapOutputType(mgr, 10L, "DISK"); } }
@Override public void commit() throws IOException { fs.rename(tmpOutputPath, outputPath); CompressAwarePath compressAwarePath = new CompressAwarePath(outputPath, getSize(), this.compressedSize); merger.closeOnDiskFile(compressAwarePath); }
this.inMemoryMerger = createInMemoryMerger(); this.inMemoryMerger.start();
@Override public void commit() throws IOException { merger.closeInMemoryFile(this); }
@Override public RawKeyValueIterator close() throws Throwable { // Wait for on-going merges to complete if (memToMemMerger != null) { memToMemMerger.close(); } inMemoryMerger.close(); onDiskMerger.close(); List<InMemoryMapOutput<K, V>> memory = new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs); inMemoryMergedMapOutputs.clear(); memory.addAll(inMemoryMapOutputs); inMemoryMapOutputs.clear(); List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs); onDiskMapOutputs.clear(); return finalMerge(jobConf, rfs, memory, disk); }
+ usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")." + "CommitMemory is (" + commitMemory + ")"); return unconditionalReserve(mapId, requestedSize, true);
int fetcher ) throws IOException { if (!canShuffleToMemory(requestedSize)) { LOG.info(mapId + ": Shuffling to disk since " + requestedSize + " is greater than maxSingleShuffleLimit (" + + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")." + "CommitMemory is (" + commitMemory + ")"); return unconditionalReserve(mapId, requestedSize, true);
inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs"); final long maxInMemReduce = getMaxInMemReduceLimit(); if (inMemoryMapOutputs.size() > 0) { TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID(); inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce); long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); LOG.info("Merging " + finalSegments.size() + " segments, " +
public void close() { // Release dataIn = null; buffer = null; // Inform the MergeManager if (merger != null) { merger.unreserve(bufferSize); } } }
protected MergeManager<K, V> createMergeManager( ShuffleConsumerPlugin.Context context) { return new MergeManagerImpl<K, V>(reduceId, jobConf, context.getLocalFS(), context.getLocalDirAllocator(), reporter, context.getCodec(), context.getCombinerClass(), context.getCombineCollector(), context.getSpilledRecordsCounter(), context.getReduceCombineInputCounter(), context.getMergedMapOutputsCounter(), this, context.getMergePhase(), context.getMapOutputFile()); }
@Override public void commit() throws IOException { fs.rename(tmpOutputPath, outputPath); CompressAwarePath compressAwarePath = new CompressAwarePath(outputPath, getSize(), this.compressedSize); merger.closeOnDiskFile(compressAwarePath); }
this.inMemoryMerger = createInMemoryMerger(); this.inMemoryMerger.start();
@Override public void commit() throws IOException { merger.closeInMemoryFile(this); }
@Override public RawKeyValueIterator close() throws Throwable { // Wait for on-going merges to complete if (memToMemMerger != null) { memToMemMerger.close(); } inMemoryMerger.close(); onDiskMerger.close(); List<InMemoryMapOutput<K, V>> memory = new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs); inMemoryMergedMapOutputs.clear(); memory.addAll(inMemoryMapOutputs); inMemoryMapOutputs.clear(); List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs); onDiskMapOutputs.clear(); return finalMerge(jobConf, rfs, memory, disk); }