@Override public Object call() throws Exception { try { TaskManager.selectNetworkInterfaceAndRunTaskManager( configuration, resourceId, taskManager); } catch (Throwable t) { LOG.error("Error while starting the TaskManager", t); System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE()); } return null; }
private Throwable runTestTaskFailingOnCheckpointError(AbstractStateBackend backend) throws Exception { Task task = createTask(new FilterOperator(), backend, mock(CheckpointResponder.class), true); // start the task and wait until it is in "restore" task.startTaskThread(); task.getExecutingThread().join(); assertEquals(ExecutionState.FAILED, task.getExecutionState()); return task.getFailureCause(); }
/** * This test checks that cancel calls that are issued before the operator is * instantiated still lead to proper canceling. */ @Test public void testEarlyCanceling() throws Exception { final StreamConfig cfg = new StreamConfig(new Configuration()); cfg.setOperatorID(new OperatorID(4711L, 42L)); cfg.setStreamOperator(new SlowlyDeserializingOperator()); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); final TaskManagerActions taskManagerActions = spy(new NoOpTaskManagerActions()); final Task task = createTask(SourceStreamTask.class, cfg, new Configuration(), taskManagerActions); final TaskExecutionState state = new TaskExecutionState( task.getJobID(), task.getExecutionId(), ExecutionState.RUNNING); task.startTaskThread(); verify(taskManagerActions, timeout(2000L)).updateTaskExecutionState(eq(state)); // send a cancel. because the operator takes a long time to deserialize, this should // hit the task before the operator is deserialized task.cancelExecution(); task.getExecutingThread().join(); assertFalse("Task did not cancel", task.getExecutingThread().isAlive()); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); }
private void tryShutdownTimerService() { if (timerService != null && !timerService.isTerminated()) { try { final long timeoutMs = getEnvironment().getTaskManagerInfo().getConfiguration(). getLong(TaskManagerOptions.TASK_CANCELLATION_TIMEOUT_TIMERS); if (!timerService.shutdownServiceUninterruptible(timeoutMs)) { LOG.warn("Timer service shutdown exceeded time limit of {} ms while waiting for pending " + "timers. Will continue with shutdown procedure.", timeoutMs); } } catch (Throwable t) { // catch and log the exception to not replace the original exception LOG.error("Could not shut down timer service", t); } } }
@Test public void testLifeCycleFull() throws Exception { ACTUAL_ORDER_TRACKING.clear(); Configuration taskManagerConfig = new Configuration(); StreamConfig cfg = new StreamConfig(new Configuration()); MockSourceFunction srcFun = new MockSourceFunction(); cfg.setStreamOperator(new LifecycleTrackingStreamSource<>(srcFun, true)); cfg.setOperatorID(new OperatorID()); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); Task task = StreamTaskTest.createTask(SourceStreamTask.class, cfg, taskManagerConfig); task.startTaskThread(); LifecycleTrackingStreamSource.runStarted.await(); // wait for clean termination task.getExecutingThread().join(); assertEquals(ExecutionState.FINISHED, task.getExecutionState()); assertEquals(EXPECTED_CALL_ORDER_FULL, ACTUAL_ORDER_TRACKING); }
String tempDir = env.getTaskManagerInfo().getTmpDirectories()[0]; ensureRocksDBIsLoaded(tempDir);
public SerializingLongReceiver createReceiver() throws Exception { TaskManagerLocation senderLocation = new TaskManagerLocation( ResourceID.generate(), LOCAL_ADDRESS, senderEnv.getConnectionManager().getDataPort()); InputGate receiverGate = createInputGate( jobId, dataSetID, executionAttemptID, senderLocation, receiverEnv, channels); SerializingLongReceiver receiver = new SerializingLongReceiver(receiverGate, channels * partitionIds.length); receiver.start(); return receiver; }
TestCheckpointResponder checkpointResponderMock = new TestCheckpointResponder();
private void runTestDeclineOnCheckpointError(AbstractStateBackend backend) throws Exception{ TestDeclinedCheckpointResponder checkpointResponder = new TestDeclinedCheckpointResponder(); Task task = createTask(new FilterOperator(), backend, checkpointResponder, false); // start the task and wait until it is in "restore" task.startTaskThread(); checkpointResponder.declinedLatch.await(); Assert.assertEquals(ExecutionState.RUNNING, task.getExecutionState()); task.cancelExecution(); task.getExecutingThread().join(); }
private StateBackend createStateBackend() throws Exception { final StateBackend fromApplication = configuration.getStateBackend(getUserCodeClassLoader()); return StateBackendLoader.fromApplicationOrConfigOrDefault( fromApplication, getEnvironment().getTaskManagerInfo().getConfiguration(), getUserCodeClassLoader(), LOG); }
@Override public void invoke() throws Exception { RecordReader<SpeedTestRecord> reader = new RecordReader<>( getEnvironment().getInputGate(0), SpeedTestRecord.class, getEnvironment().getTaskManagerInfo().getTmpDirectories()); try { boolean isSlow = getTaskConfiguration().getBoolean(IS_SLOW_RECEIVER_CONFIG_KEY, false); int numRecords = 0; while (reader.next() != null) { if (isSlow && (numRecords++ % IS_SLOW_EVERY_NUM_RECORDS) == 0) { Thread.sleep(IS_SLOW_SLEEP_MS); } } } finally { reader.clearBuffers(); } } }
@Test public void testBlockingNonInterruptibleCheckpoint() throws Exception { StateBackend lockingStateBackend = new BackendForTestStream(LockingOutputStream::new); Task task = createTask(new TestOperator(), lockingStateBackend, mock(CheckpointResponder.class), true); // start the task and wait until it is in "restore" task.startTaskThread(); IN_CHECKPOINT_LATCH.await(); // cancel the task and wait. unless cancellation properly closes // the streams, this will never terminate task.cancelExecution(); task.getExecutingThread().join(); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator,
@Override public void invoke() throws Exception { RecordReader<SpeedTestRecord> reader = new RecordReader<>( getEnvironment().getInputGate(0), SpeedTestRecord.class, getEnvironment().getTaskManagerInfo().getTmpDirectories()); RecordWriter<SpeedTestRecord> writer = new RecordWriter<>(getEnvironment().getWriter(0)); try { SpeedTestRecord record; while ((record = reader.next()) != null) { writer.emit(record); } } finally { reader.clearBuffers(); writer.clearBuffers(); writer.flushAll(); } } }
@Test public void testCancellationFailsWithBlockingLock() throws Exception { syncLatch = new OneShotLatch(); StreamConfig cfg = new StreamConfig(new Configuration()); Task task = createTask(CancelFailingTask.class, cfg, new Configuration()); // start the task and wait until it runs // execution state RUNNING is not enough, we need to wait until the stream task's run() method // is entered task.startTaskThread(); syncLatch.await(); // cancel the execution - this should lead to smooth shutdown task.cancelExecution(); task.getExecutingThread().join(); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); }
@Override public void init() throws Exception { StreamConfig configuration = getConfiguration(); TypeSerializer<IN> inSerializer = configuration.getTypeSerializerIn1(getUserCodeClassLoader()); int numberOfInputs = configuration.getNumberOfInputs(); if (numberOfInputs > 0) { InputGate[] inputGates = getEnvironment().getAllInputGates(); inputProcessor = new StreamInputProcessor<>( inputGates, inSerializer, this, configuration.getCheckpointMode(), getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator, getEnvironment().getMetricGroup().getIOMetricGroup(), inputWatermarkGauge); } headOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, this.inputWatermarkGauge); // wrap watermark gauge since registered metrics must be unique getEnvironment().getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, this.inputWatermarkGauge::getValue); }
@Test public void testCancellationNotBlockedOnLock() throws Exception { syncLatch = new OneShotLatch(); StreamConfig cfg = new StreamConfig(new Configuration()); Task task = createTask(CancelLockingTask.class, cfg, new Configuration()); // start the task and wait until it runs // execution state RUNNING is not enough, we need to wait until the stream task's run() method // is entered task.startTaskThread(); syncLatch.await(); // cancel the execution - this should lead to smooth shutdown task.cancelExecution(); task.getExecutingThread().join(); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); }
final Configuration configuration = this.getContainingTask().getEnvironment().getTaskManagerInfo().getConfiguration(); final long latencyTrackingInterval = getExecutionConfig().isLatencyTrackingConfigured() ? getExecutionConfig().getLatencyTrackingInterval()
private void testRestoreWithInterrupt(int mode) throws Exception { IN_RESTORE_LATCH.reset(); Configuration taskConfig = new Configuration(); StreamConfig cfg = new StreamConfig(taskConfig); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); switch (mode) { case OPERATOR_MANAGED: case OPERATOR_RAW: case KEYED_MANAGED: case KEYED_RAW: cfg.setStateKeySerializer(IntSerializer.INSTANCE); cfg.setStreamOperator(new StreamSource<>(new TestSource(mode))); break; default: throw new IllegalArgumentException(); } StreamStateHandle lockingHandle = new InterruptLockingStateHandle(); Task task = createTask(cfg, taskConfig, lockingHandle, mode); // start the task and wait until it is in "restore" task.startTaskThread(); IN_RESTORE_LATCH.await(); // trigger cancellation and signal to continue task.cancelExecution(); task.getExecutingThread().join(30000); if (task.getExecutionState() == ExecutionState.CANCELING) { fail("Task is stuck and not canceling"); } assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
@Test public void testLifeCycleCancel() throws Exception { ACTUAL_ORDER_TRACKING.clear(); Configuration taskManagerConfig = new Configuration(); StreamConfig cfg = new StreamConfig(new Configuration()); MockSourceFunction srcFun = new MockSourceFunction(); cfg.setStreamOperator(new LifecycleTrackingStreamSource<>(srcFun, false)); cfg.setOperatorID(new OperatorID()); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); Task task = StreamTaskTest.createTask(SourceStreamTask.class, cfg, taskManagerConfig); task.startTaskThread(); LifecycleTrackingStreamSource.runStarted.await(); // this should cancel the task even though it is blocked on runFinished task.cancelExecution(); // wait for clean termination task.getExecutingThread().join(); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertEquals(EXPECTED_CALL_ORDER_CANCEL_RUNNING, ACTUAL_ORDER_TRACKING); }