org.apache.spark.TaskContext java code examples

Refine search

TaskMetrics

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

public static void updateSparkRecordsWrittenMetrics(long numRows) {
 TaskContext taskContext = TaskContext.get();
 if (taskContext != null && numRows > 0) {
  taskContext.taskMetrics().outputMetrics().setRecordsWritten(numRows);
 }
}

 @Override
 public void onTaskCompletion(TaskContext context) {
  context.isCompleted();
  context.isInterrupted();
  context.stageId();
  context.stageAttemptNumber();
  context.partitionId();
  context.addTaskCompletionListener(this);
 }
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed
  // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for
  // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum".
  // When there're multiple attempts for a task, Hive will rely on the partitionId
  // to figure out if the data are duplicate or not when collecting the final outputs
  // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles)
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_").append(TaskContext.get().attemptNumber());

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

/**
 * Increase the peak execution memory for current task.
 */
public void incPeakExecutionMemory(long size) {
 TaskContext.get().taskMetrics().incPeakExecutionMemory(size);
}

this.keySchema = keySchema;
this.valueSchema = valueSchema;
final TaskContext taskContext = TaskContext.get();
 SortPrefixUtils.canSortFullyWithPrefix(keySchema.apply(0));
TaskMemoryManager taskMemoryManager = taskContext.taskMemoryManager();

public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
  String partitionPath, String fileId) {
 super(config, commitTime, hoodieTable);
 writeStatus.setFileId(fileId);
 writeStatus.setPartitionPath(partitionPath);
 final int sparkPartitionId = TaskContext.getPartitionId();
 this.path = makeNewPath(partitionPath, sparkPartitionId, writeStatus.getFileId());
 if (config.shouldUseTempFolderForCopyOnWriteForCreate()) {
  this.tempPath = makeTempPath(partitionPath, sparkPartitionId, writeStatus.getFileId(),
    TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
 }
 try {
  HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime,
    new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
  partitionMetadata.trySave(TaskContext.getPartitionId());
  this.storageWriter = HoodieStorageWriterFactory
    .getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema);
 } catch (IOException e) {
  throw new HoodieInsertException(
    "Failed to initialize HoodieStorageWriter for path " + getStorageWriterPath(), e);
 }
 logger.info("New InsertHandle for partition :" + partitionPath + " with fileId " + fileId);
}

/**
 * Constructor for deserialization. Shouldn't be called directly.
 */
public DefaultSparkHttpServicePluginContext() throws IOException {
 this.runtimeContext = SparkRuntimeContextProvider.get();
 this.pluginInstantiator = createPluginsInstantiator(runtimeContext);
 this.pluginConfigurer = null;
 this.extraPlugins = new HashMap<>();
 // Each deserizliaed instance of this class should be used for the current task excution only,
 // hence we can do the cleanup on task completion.
 TaskContext.get().addTaskCompletionListener(new TaskCompletionListener() {
  @Override
  public void onTaskCompletion(TaskContext context) {
   Closeables.closeQuietly(pluginInstantiator);
  }
 });
}

static void addOnCompletition(TaskContext taskContext, final Function0<?> function) {
  taskContext.addTaskCompletionListener(new TaskCompletionListener() {
    @Override
    public void onTaskCompletion(TaskContext context) {
      function.apply();
    }
  });
}

BypassMergeSortShuffleWriter(
  BlockManager blockManager,
  IndexShuffleBlockResolver shuffleBlockResolver,
  BypassMergeSortShuffleHandle<K, V> handle,
  int mapId,
  TaskContext taskContext,
  SparkConf conf) {
 // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
 this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
 this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
 this.blockManager = blockManager;
 final ShuffleDependency<K, V, V> dep = handle.dependency();
 this.mapId = mapId;
 this.shuffleId = dep.shuffleId();
 this.partitioner = dep.partitioner();
 this.numPartitions = partitioner.numPartitions();
 this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics();
 this.serializer = dep.serializer();
 this.shuffleBlockResolver = shuffleBlockResolver;
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Spark task attempt id is increased by Spark context instead of task, which may introduce
  // unstable qtest output, since non Hive features depends on this, we always set it to 0 here.
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_0");

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

  @Override
  public Void apply(Iterator<T> iterator) {
    writer.write(TaskContext.get(), iterator);
    return null;
  }
}

this.groupingKeySchema = groupingKeySchema;
this.map = new BytesToBytesMap(
 taskContext.taskMemoryManager(), initialCapacity, pageSizeBytes, true);
taskContext.addTaskCompletionListener(context -> {
 free();
});

@Override
public Tuple apply(Tuple2<Text, Tuple> v1) {
  if (!initialized) {
    long partitionId = TaskContext.get().partitionId();
    Configuration jobConf = PigMapReduce.sJobConfInternal.get();
    jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId));
    jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId));
    initialized = true;
  }
  if (sparkCounters != null && disableCounter == false) {
    sparkCounters.increment(counterGroupName, counterName, 1L);
  }
  return v1._2();
}

  @Override
  protected boolean writeErrorToHDFS(int limit, String taskId) {
    if (command.getPersistStderr()) {
      int tipId = TaskContext.get().attemptNumber();
      return tipId < command.getLogFilesLimit();
    }
    return false;
  }
}

spillFilesCreated.clear();
taskContext = mock(TaskContext.class);
when(taskContext.taskMetrics()).thenReturn(new TaskMetrics());
when(blockManager.diskBlockManager()).thenReturn(diskBlockManager);
when(diskBlockManager.createTempLocalBlock()).thenAnswer(invocationOnMock -> {

taskContext.taskMetrics().incMemoryBytesSpilled(spillSize);
taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten());
totalSpillBytes += spillSize;
return spillSize;

/**
 * Sort and spill the current records in response to memory pressure.
 */
@Override
public long spill(long size, MemoryConsumer trigger) throws IOException {
 if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) {
  return 0L;
 }
 logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
  Thread.currentThread().getId(),
  Utils.bytesToString(getMemoryUsage()),
  spills.size(),
  spills.size() > 1 ? " times" : " time");
 writeSortedFile(false);
 final long spillSize = freeMemory();
 inMemSorter.reset();
 // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the
 // records. Otherwise, if the task is over allocated memory, then without freeing the memory
 // pages, we might not be able to get memory for the pointer array.
 taskContext.taskMetrics().incMemoryBytesSpilled(spillSize);
 return spillSize;
}

 @Override
 public Option<MapStatus> stop(boolean success) {
  try {
   taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes());

   if (stopping) {
    return Option.apply(null);
   } else {
    stopping = true;
    if (success) {
     if (mapStatus == null) {
      throw new IllegalStateException("Cannot call stop(true) without having called write()");
     }
     return Option.apply(mapStatus);
    } else {
     return Option.apply(null);
    }
   }
  } finally {
   if (sorter != null) {
    // If sorter is non-null, then this implies that we called stop() in response to an error,
    // so we need to clean up memory and spill files created by the sorter
    sorter.cleanupResources();
   }
  }
 }
}

@Test
public void testDiskSpilledBytes() throws Exception {
 final UnsafeExternalSorter sorter = newSorter();
 long[] record = new long[100];
 int recordSize = record.length * 8;
 int n = (int) pageSizeBytes / recordSize * 3;
 for (int i = 0; i < n; i++) {
  record[0] = (long) i;
  sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false);
 }
 // We will have at-least 2 memory pages allocated because of rounding happening due to
 // integer division of pageSizeBytes and recordSize.
 assertTrue(sorter.getNumberOfAllocatedPages() >= 2);
 assertTrue(taskContext.taskMetrics().diskBytesSpilled() == 0);
 UnsafeExternalSorter.SpillableIterator iter =
     (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator();
 assertTrue(iter.spill() > 0);
 assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0);
 assertEquals(0, iter.spill());
 // Even if we did not spill second time, the disk spilled bytes should still be non-zero
 assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0);
 sorter.cleanupResources();
 assertSpillFilesWereCleanedUp();
}

Most used methods

Popular in Java

Start an intent from android
setRequestProperty (URLConnection)
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Github Copilot alternatives

How to useTaskContext in org.apache.spark

Best Java code snippets using org.apache.spark.TaskContext (Showing top 20 results out of 315)

Refine search

How to use
TaskContext
in
org.apache.spark