Refine search
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
public static void updateSparkRecordsWrittenMetrics(long numRows) { TaskContext taskContext = TaskContext.get(); if (taskContext != null && numRows > 0) { taskContext.taskMetrics().outputMetrics().setRecordsWritten(numRows); } }
@Override public void onTaskCompletion(TaskContext context) { context.isCompleted(); context.isInterrupted(); context.stageId(); context.stageAttemptNumber(); context.partitionId(); context.addTaskCompletionListener(this); } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
/** * Increase the peak execution memory for current task. */ public void incPeakExecutionMemory(long size) { TaskContext.get().taskMetrics().incPeakExecutionMemory(size); }
this.keySchema = keySchema; this.valueSchema = valueSchema; final TaskContext taskContext = TaskContext.get(); SortPrefixUtils.canSortFullyWithPrefix(keySchema.apply(0)); TaskMemoryManager taskMemoryManager = taskContext.taskMemoryManager();
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, String partitionPath, String fileId) { super(config, commitTime, hoodieTable); writeStatus.setFileId(fileId); writeStatus.setPartitionPath(partitionPath); final int sparkPartitionId = TaskContext.getPartitionId(); this.path = makeNewPath(partitionPath, sparkPartitionId, writeStatus.getFileId()); if (config.shouldUseTempFolderForCopyOnWriteForCreate()) { this.tempPath = makeTempPath(partitionPath, sparkPartitionId, writeStatus.getFileId(), TaskContext.get().stageId(), TaskContext.get().taskAttemptId()); } try { HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath)); partitionMetadata.trySave(TaskContext.getPartitionId()); this.storageWriter = HoodieStorageWriterFactory .getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema); } catch (IOException e) { throw new HoodieInsertException( "Failed to initialize HoodieStorageWriter for path " + getStorageWriterPath(), e); } logger.info("New InsertHandle for partition :" + partitionPath + " with fileId " + fileId); }
/** * Constructor for deserialization. Shouldn't be called directly. */ public DefaultSparkHttpServicePluginContext() throws IOException { this.runtimeContext = SparkRuntimeContextProvider.get(); this.pluginInstantiator = createPluginsInstantiator(runtimeContext); this.pluginConfigurer = null; this.extraPlugins = new HashMap<>(); // Each deserizliaed instance of this class should be used for the current task excution only, // hence we can do the cleanup on task completion. TaskContext.get().addTaskCompletionListener(new TaskCompletionListener() { @Override public void onTaskCompletion(TaskContext context) { Closeables.closeQuietly(pluginInstantiator); } }); }
static void addOnCompletition(TaskContext taskContext, final Function0<?> function) { taskContext.addTaskCompletionListener(new TaskCompletionListener() { @Override public void onTaskCompletion(TaskContext context) { function.apply(); } }); }
BypassMergeSortShuffleWriter( BlockManager blockManager, IndexShuffleBlockResolver shuffleBlockResolver, BypassMergeSortShuffleHandle<K, V> handle, int mapId, TaskContext taskContext, SparkConf conf) { // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true); this.blockManager = blockManager; final ShuffleDependency<K, V, V> dep = handle.dependency(); this.mapId = mapId; this.shuffleId = dep.shuffleId(); this.partitioner = dep.partitioner(); this.numPartitions = partitioner.numPartitions(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.serializer = dep.serializer(); this.shuffleBlockResolver = shuffleBlockResolver; }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Spark task attempt id is increased by Spark context instead of task, which may introduce // unstable qtest output, since non Hive features depends on this, we always set it to 0 here. taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_0"); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
@Override public Void apply(Iterator<T> iterator) { writer.write(TaskContext.get(), iterator); return null; } }
this.groupingKeySchema = groupingKeySchema; this.map = new BytesToBytesMap( taskContext.taskMemoryManager(), initialCapacity, pageSizeBytes, true); taskContext.addTaskCompletionListener(context -> { free(); });
@Override public Tuple apply(Tuple2<Text, Tuple> v1) { if (!initialized) { long partitionId = TaskContext.get().partitionId(); Configuration jobConf = PigMapReduce.sJobConfInternal.get(); jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId)); jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId)); initialized = true; } if (sparkCounters != null && disableCounter == false) { sparkCounters.increment(counterGroupName, counterName, 1L); } return v1._2(); }
@Override protected boolean writeErrorToHDFS(int limit, String taskId) { if (command.getPersistStderr()) { int tipId = TaskContext.get().attemptNumber(); return tipId < command.getLogFilesLimit(); } return false; } }
spillFilesCreated.clear(); taskContext = mock(TaskContext.class); when(taskContext.taskMetrics()).thenReturn(new TaskMetrics()); when(blockManager.diskBlockManager()).thenReturn(diskBlockManager); when(diskBlockManager.createTempLocalBlock()).thenAnswer(invocationOnMock -> {
taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); totalSpillBytes += spillSize; return spillSize;
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
@Override public Option<MapStatus> stop(boolean success) { try { taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes()); if (stopping) { return Option.apply(null); } else { stopping = true; if (success) { if (mapStatus == null) { throw new IllegalStateException("Cannot call stop(true) without having called write()"); } return Option.apply(mapStatus); } else { return Option.apply(null); } } } finally { if (sorter != null) { // If sorter is non-null, then this implies that we called stop() in response to an error, // so we need to clean up memory and spill files created by the sorter sorter.cleanupResources(); } } } }
@Test public void testDiskSpilledBytes() throws Exception { final UnsafeExternalSorter sorter = newSorter(); long[] record = new long[100]; int recordSize = record.length * 8; int n = (int) pageSizeBytes / recordSize * 3; for (int i = 0; i < n; i++) { record[0] = (long) i; sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); } // We will have at-least 2 memory pages allocated because of rounding happening due to // integer division of pageSizeBytes and recordSize. assertTrue(sorter.getNumberOfAllocatedPages() >= 2); assertTrue(taskContext.taskMetrics().diskBytesSpilled() == 0); UnsafeExternalSorter.SpillableIterator iter = (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); assertTrue(iter.spill() > 0); assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); assertEquals(0, iter.spill()); // Even if we did not spill second time, the disk spilled bytes should still be non-zero assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }