@Test public void testSortingEmptyArrays() throws Exception { final UnsafeExternalSorter sorter = newSorter(); sorter.insertRecord(null, 0, 0, 0, false); sorter.insertRecord(null, 0, 0, 0, false); sorter.spill(); sorter.insertRecord(null, 0, 0, 0, false); sorter.spill(); sorter.insertRecord(null, 0, 0, 0, false); sorter.insertRecord(null, 0, 0, 0, false); UnsafeSorterIterator iter = sorter.getSortedIterator(); for (int i = 1; i <= 5; i++) { iter.loadNext(); assertEquals(0, iter.getKeyPrefix()); assertEquals(0, iter.getRecordLength()); } sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
private UnsafeExternalSorter newSorter() throws IOException { return UnsafeExternalSorter.create( taskMemoryManager, blockManager, serializerManager, taskContext, () -> recordComparator, prefixComparator, /* initialSize */ 1024, pageSizeBytes, spillThreshold, shouldUseRadixSort()); }
@Test public void testSortTimeMetric() throws Exception { final UnsafeExternalSorter sorter = newSorter(); long prevSortTime = sorter.getSortTimeNanos(); assertEquals(prevSortTime, 0); sorter.insertRecord(null, 0, 0, 0, false); sorter.spill(); assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime)); prevSortTime = sorter.getSortTimeNanos(); sorter.spill(); // no sort needed assertEquals(sorter.getSortTimeNanos(), prevSortTime); sorter.insertRecord(null, 0, 0, 0, false); UnsafeSorterIterator iter = sorter.getSortedIterator(); assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime)); }
public static UnsafeExternalSorter createWithExistingInMemorySorter( TaskMemoryManager taskMemoryManager, BlockManager blockManager, SerializerManager serializerManager, TaskContext taskContext, Supplier<RecordComparator> recordComparatorSupplier, PrefixComparator prefixComparator, int initialSize, long pageSizeBytes, int numElementsForSpillThreshold, UnsafeInMemorySorter inMemorySorter) throws IOException { UnsafeExternalSorter sorter = new UnsafeExternalSorter(taskMemoryManager, blockManager, serializerManager, taskContext, recordComparatorSupplier, prefixComparator, initialSize, pageSizeBytes, numElementsForSpillThreshold, inMemorySorter, false /* ignored */); sorter.spill(Long.MAX_VALUE, sorter); // The external sorter will be used to insert records, in-memory sorter is not needed. sorter.inMemSorter = null; return sorter; }
/** * Merges another UnsafeExternalSorters into this one, the other one will be emptied. * * @throws IOException */ public void merge(UnsafeExternalSorter other) throws IOException { other.spill(); spillWriters.addAll(other.spillWriters); // remove them from `spillWriters`, or the files will be deleted in `cleanupResources`. other.spillWriters.clear(); other.cleanupResources(); }
@Test public void forcedSpillingWithNotReadIterator() throws Exception { final UnsafeExternalSorter sorter = newSorter(); long[] record = new long[100]; int recordSize = record.length * 8; int n = (int) pageSizeBytes / recordSize * 3; for (int i = 0; i < n; i++) { record[0] = (long) i; sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); } assertTrue(sorter.getNumberOfAllocatedPages() >= 2); UnsafeExternalSorter.SpillableIterator iter = (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); assertTrue(iter.spill() > 0); assertEquals(0, iter.spill()); for (int i = 0; i < n; i++) { iter.hasNext(); iter.loadNext(); assertEquals(i, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset())); } sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
@Test public void testSortingOnlyByPrefix() throws Exception { final UnsafeExternalSorter sorter = newSorter(); insertNumber(sorter, 5); insertNumber(sorter, 1); insertNumber(sorter, 3); sorter.spill(); insertNumber(sorter, 4); sorter.spill(); insertNumber(sorter, 2); UnsafeSorterIterator iter = sorter.getSortedIterator(); for (int i = 1; i <= 5; i++) { iter.loadNext(); assertEquals(i, iter.getKeyPrefix()); assertEquals(4, iter.getRecordLength()); assertEquals(i, Platform.getInt(iter.getBaseObject(), iter.getBaseOffset())); } sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
final long pageSizeBytes = 256; final long numRecordsPerPage = pageSizeBytes / recordLengthBytes; final UnsafeExternalSorter sorter = UnsafeExternalSorter.create( taskMemoryManager, blockManager, long previousPeakMemory = sorter.getPeakMemoryUsedBytes(); long newPeakMemory; try { for (int i = 0; i < numRecordsPerPage * 10; i++) { insertNumber(sorter, i); newPeakMemory = sorter.getPeakMemoryUsedBytes(); if (i % numRecordsPerPage == 0) { sorter.spill(); newPeakMemory = sorter.getPeakMemoryUsedBytes(); assertEquals(previousPeakMemory, newPeakMemory); for (int i = 0; i < numRecordsPerPage; i++) { insertNumber(sorter, i); newPeakMemory = sorter.getPeakMemoryUsedBytes(); assertEquals(previousPeakMemory, newPeakMemory); } finally { sorter.cleanupResources(); assertSpillFilesWereCleanedUp();
@Test public void forcedSpillingWithoutComparator() throws Exception { final UnsafeExternalSorter sorter = UnsafeExternalSorter.create( taskMemoryManager, blockManager, for (int i = 0; i < n; i++) { record[0] = (long) i; sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); if (i % batch == batch - 1) { sorter.spill(); UnsafeSorterIterator iter = sorter.getIterator(0); for (int i = 0; i < n; i++) { iter.hasNext(); assertEquals(i, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset())); sorter.cleanupResources(); assertSpillFilesWereCleanedUp();
@Test public void spillingOccursInResponseToMemoryPressure() throws Exception { final UnsafeExternalSorter sorter = newSorter(); // This should be enough records to completely fill up a data page: final int numRecords = (int) (pageSizeBytes / (4 + 4)); for (int i = 0; i < numRecords; i++) { insertNumber(sorter, numRecords - i); } assertEquals(1, sorter.getNumberOfAllocatedPages()); memoryManager.markExecutionAsOutOfMemoryOnce(); // The insertion of this record should trigger a spill: insertNumber(sorter, 0); // Ensure that spill files were created assertThat(tempDir.listFiles().length, greaterThanOrEqualTo(1)); // Read back the sorted data: UnsafeSorterIterator iter = sorter.getSortedIterator(); int i = 0; while (iter.hasNext()) { iter.loadNext(); assertEquals(i, iter.getKeyPrefix()); assertEquals(4, iter.getRecordLength()); assertEquals(i, Platform.getInt(iter.getBaseObject(), iter.getBaseOffset())); i++; } assertEquals(numRecords + 1, i); sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
@Test public void testFillingPage() throws Exception { final UnsafeExternalSorter sorter = newSorter(); byte[] record = new byte[16]; while (sorter.getNumberOfAllocatedPages() < 2) { sorter.insertRecord(record, Platform.BYTE_ARRAY_OFFSET, record.length, 0, false); } sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
sorter = UnsafeExternalSorter.create( taskMemoryManager, blockManager, sorter = UnsafeExternalSorter.createWithExistingInMemorySorter( taskMemoryManager, blockManager,
@VisibleForTesting void insertRow(UnsafeRow row) throws IOException { final long prefix = prefixComputer.computePrefix(row); sorter.insertRecord( row.getBaseObject(), row.getBaseOffset(), row.getSizeInBytes(), prefix ); numRowsInserted++; if (testSpillFrequency > 0 && (numRowsInserted % testSpillFrequency) == 0) { sorter.spill(); } }
/** * Return the peak memory used so far, in bytes. */ public long getPeakMemoryUsedBytes() { return sorter.getPeakMemoryUsedBytes(); }
/** * Returns a sorted iterator. It is the caller's responsibility to call `cleanupResources()` * after consuming this iterator. */ public KVSorterIterator sortedIterator() throws IOException { try { final UnsafeSorterIterator underlying = sorter.getSortedIterator(); if (!underlying.hasNext()) { // Since we won't ever call next() on an empty iterator, we need to clean up resources // here in order to prevent memory leaks. cleanupResources(); } return new KVSorterIterator(underlying); } catch (IOException e) { cleanupResources(); throw e; } }
/** * Write a record to the sorter. */ public void insertRecord( Object recordBase, long recordOffset, int length, long prefix, boolean prefixIsNull) throws IOException { assert(inMemSorter != null); if (inMemSorter.numRecords() >= numElementsForSpillThreshold) { logger.info("Spilling data because number of spilledRecords crossed the threshold " + numElementsForSpillThreshold); spill(); } growPointerArrayIfNecessary(); int uaoSize = UnsafeAlignedOffset.getUaoSize(); // Need 4 or 8 bytes to store the record length. final int required = length + uaoSize; acquireNewPageIfNecessary(required); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); UnsafeAlignedOffset.putSize(base, pageCursor, length); pageCursor += uaoSize; Platform.copyMemory(recordBase, recordOffset, base, pageCursor, length); pageCursor += length; inMemSorter.insertRecord(recordAddress, prefix, prefixIsNull); }
try { array = allocateArray(used / 8 * 2); } catch (TooLargePageException e) { spill(); return; } catch (SparkOutOfMemoryError e) { freeArray(array); } else { inMemSorter.expandPointerArray(array);
this.inMemSorter = existingInMemorySorter; this.peakMemoryUsedBytes = getMemoryUsage(); this.numElementsForSpillThreshold = numElementsForSpillThreshold; cleanupResources(); });
@Test public void testGetIterator() throws Exception { final UnsafeExternalSorter sorter = newSorter(); for (int i = 0; i < 100; i++) { insertNumber(sorter, i); } verifyIntIterator(sorter.getIterator(0), 0, 100); verifyIntIterator(sorter.getIterator(79), 79, 100); sorter.spill(); for (int i = 100; i < 200; i++) { insertNumber(sorter, i); } sorter.spill(); verifyIntIterator(sorter.getIterator(79), 79, 200); for (int i = 200; i < 300; i++) { insertNumber(sorter, i); } verifyIntIterator(sorter.getIterator(79), 79, 300); verifyIntIterator(sorter.getIterator(139), 139, 300); verifyIntIterator(sorter.getIterator(279), 279, 300); }
private static void insertRecord( UnsafeExternalSorter sorter, int[] record, long prefix) throws IOException { sorter.insertRecord(record, Platform.INT_ARRAY_OFFSET, record.length * 4, prefix, false); }