org.apache.flink.runtime.executiongraph.failover.FailoverRegion java code examples

private void makeAllOneRegion(List<ExecutionJobVertex> jobVertices) {
  LOG.warn("Cannot decompose ExecutionGraph into individual failover regions due to use of " +
      "Co-Location constraints (iterations). Job will fail over as one holistic unit.");
  final ArrayList<ExecutionVertex> allVertices = new ArrayList<>();
  for (ExecutionJobVertex ejv : jobVertices) {
    // safe some incremental size growing
    allVertices.ensureCapacity(allVertices.size() + ejv.getParallelism());
    for (ExecutionVertex ev : ejv.getTaskVertices()) {
      allVertices.add(ev);
    }
  }
  final FailoverRegion singleRegion = new FailoverRegion(executionGraph, executor, allVertices, regionFailLimit);
  for (ExecutionVertex ev : allVertices) {
    vertexToRegion.put(ev, singleRegion);
  }
}

private void reset(long globalModVersionOfFailover) {
  if (transitionState(JobStatus.CANCELED, JobStatus.CREATED)) {
    // reset all connected ExecutionVertexes
    final Collection<CoLocationGroup> colGroups = new HashSet<>();
    for (ExecutionVertex ev : connectedExecutionVertices) {
      CoLocationGroup cgroup = ev.getJobVertex().getCoLocationGroup();
      if (cgroup != null && !colGroups.contains(cgroup)){
        cgroup.resetConstraints();
        colGroups.add(cgroup);
      }
    }
    restart(globalModVersionOfFailover);
  }
  else {
    failover(globalModVersionOfFailover,
        new FlinkException("FailoverRegion " + id + " switch from CANCELLED to CREATED fail."));
  }
}

private void allVerticesInTerminalState(long globalModVersionOfFailover) {
  while (true) {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.CANCELLING)) {
      if (transitionState(curStatus, JobStatus.CANCELED)) {
        reset(globalModVersionOfFailover);
        break;
      }
    }
    else {
      LOG.info("FailoverRegion {} is {} when allVerticesInTerminalState.", id, state);
      break;
    }
  }
}

/**
 * Restart the region by notify the schedule plugin.
 */
private void restart(long globalModVersionOfFailover) {
  try {
    if (transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {
      // Let the scheduler event to reschedule connected ExecutionVertices
      executionGraph.resetExecutionVerticesAndNotify(globalModVersionOfFailover, connectedExecutionVertices);
    }
    else {
      failover(globalModVersionOfFailover,
          new FlinkException("FailoverRegion " + id + " witch from CREATED to RUNNING fail."));
    }
  } catch (GlobalModVersionMismatch e) {
    // happens when a global recovery happens concurrently to the regional recovery
    // should do nothing
  } catch (Exception e) {
    failover(globalModVersionOfFailover,
        new FlinkException("FailoverRegion " + id + " restart failed.", e));
  }
}

private void failover(long globalModVersionOfFailover) {
  if (!executionGraph.getRestartStrategy().canRestart()) {
    executionGraph.failGlobal(new FlinkException("RestartStrategy validate fail"));
  }
  else {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      cancel(globalModVersionOfFailover);
    }
    else if (curStatus.equals(JobStatus.CANCELED)) {
      reset(globalModVersionOfFailover);
    }
    else {
      LOG.info("FailoverRegion {} is {} when notified to failover.", id, state);
    }
  }
}

JobStatus curStatus = this.state;
if (curStatus.equals(JobStatus.RUNNING)) {
  if (transitionState(curStatus, JobStatus.CANCELLING)) {
      (Void ignored, Throwable throwable) -> {
        if (throwable != null) {
          failover(globalModVersionOfFailover,
            new FlinkException("Could not cancel all execution job vertices properly.", throwable));
        } else {
          allVerticesInTerminalState(globalModVersionOfFailover);

private void cancel(final long globalModVersionOfFailover) {
  while (true) {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      if (transitionState(curStatus, JobStatus.CANCELLING)) {
        // we build a future that is complete once all vertices have reached a terminal state
        final ArrayList<CompletableFuture<?>> futures = new ArrayList<>(connectedExecutionVertexes.size());
        // cancel all tasks (that still need cancelling)
        for (ExecutionVertex vertex : connectedExecutionVertexes) {
          futures.add(vertex.cancel());
        }
        final FutureUtils.ConjunctFuture<Void> allTerminal = FutureUtils.waitForAll(futures);
        allTerminal.thenAcceptAsync(
          (Void value) -> allVerticesInTerminalState(globalModVersionOfFailover),
          executor);
        break;
      }
    }
    else {
      LOG.info("FailoverRegion {} is {} when cancel.", id, state);
      break;
    }
  }
}

@Override
public void onTaskFailure(Execution taskExecution, Throwable cause) {
  final ExecutionVertex ev = taskExecution.getVertex();
  final FailoverRegion failoverRegion = vertexToRegion.get(ev);
  if (failoverRegion == null) {
    executionGraph.failGlobal(new FlinkException(
        "Can not find a failover region for the execution " + ev.getTaskNameWithSubtaskIndex(), cause));
  }
  else {
    LOG.info("Recovering task failure for {} #{} ({}) via restart of failover region",
        taskExecution.getVertex().getTaskNameWithSubtaskIndex(),
        taskExecution.getAttemptNumber(),
        taskExecution.getAttemptId());
    failoverRegion.onExecutionFail(taskExecution, cause);
  }
}

public void onExecutionFail(Execution taskExecution, Throwable cause) {
  // TODO: check if need to failover the preceding region
  if (!executionGraph.getRestartStrategy().canRestart()) {
    // delegate the failure to a global fail that will check the restart strategy and not restart
    executionGraph.failGlobal(cause);
  }
  else {
    cancel(taskExecution.getGlobalModVersion());
  }
}

public void onExecutionFail(Execution taskExecution, Throwable cause) {
  // TODO: check if need to failover the preceding region
  failover(taskExecution.getGlobalModVersion(), cause);
}

private void cancel(final long globalModVersionOfFailover) {
  while (true) {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      if (transitionState(curStatus, JobStatus.CANCELLING)) {
        // we build a future that is complete once all vertices have reached a terminal state
        final ArrayList<Future<?>> futures = new ArrayList<>(connectedExecutionVertexes.size());
        // cancel all tasks (that still need cancelling)
        for (ExecutionVertex vertex : connectedExecutionVertexes) {
          futures.add(vertex.cancel());
        }
        final FutureUtils.ConjunctFuture<Void> allTerminal = FutureUtils.waitForAll(futures);
        allTerminal.thenAcceptAsync(new AcceptFunction<Void>() {
          @Override
          public void accept(Void value) {
            allVerticesInTerminalState(globalModVersionOfFailover);
          }
        }, executor);
        break;
      }
    }
    else {
      LOG.info("FailoverRegion {} is {} when cancel.", id, state);
      break;
    }
  }
}

private void failover(long globalModVersionOfFailover) {
  if (!executionGraph.getRestartStrategy().canRestart()) {
    executionGraph.failGlobal(new FlinkException("RestartStrategy validate fail"));
  }
  else {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      cancel(globalModVersionOfFailover);
    }
    else if (curStatus.equals(JobStatus.CANCELED)) {
      reset(globalModVersionOfFailover);
    }
    else {
      LOG.info("FailoverRegion {} is {} when notified to failover.", id, state);
    }
  }
}

private void restart(long globalModVersionOfFailover) {
  try {
    if (transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {
          failover(globalModVersionOfFailover);
      failover(globalModVersionOfFailover);
    failover(globalModVersionOfFailover);

private void cancel(final long globalModVersionOfFailover) {
  while (true) {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      if (transitionState(curStatus, JobStatus.CANCELLING)) {
        // we build a future that is complete once all vertices have reached a terminal state
        final ArrayList<CompletableFuture<?>> futures = new ArrayList<>(connectedExecutionVertexes.size());
        // cancel all tasks (that still need cancelling)
        for (ExecutionVertex vertex : connectedExecutionVertexes) {
          futures.add(vertex.cancel());
        }
        final FutureUtils.ConjunctFuture<Void> allTerminal = FutureUtils.waitForAll(futures);
        allTerminal.thenAcceptAsync(
          (Void value) -> allVerticesInTerminalState(globalModVersionOfFailover),
          executor);
        break;
      }
    }
    else {
      LOG.info("FailoverRegion {} is {} when cancel.", id, state);
      break;
    }
  }
}

@Override
public void onTaskFailure(Execution taskExecution, Throwable cause) {
  final ExecutionVertex ev = taskExecution.getVertex();
  final FailoverRegion failoverRegion = vertexToRegion.get(ev);
  if (failoverRegion == null) {
    executionGraph.failGlobal(new FlinkException(
        "Can not find a failover region for the execution " + ev.getTaskNameWithSubtaskIndex(), cause));
  }
  else {
    LOG.info("Recovering task failure for {} #{} ({}) via restart of failover region",
        taskExecution.getVertex().getTaskNameWithSubtaskIndex(),
        taskExecution.getAttemptNumber(),
        taskExecution.getAttemptId());
    failoverRegion.onExecutionFail(taskExecution, cause);
  }
}

public void onExecutionFail(Execution taskExecution, Throwable cause) {
  // TODO: check if need to failover the preceding region
  if (!executionGraph.getRestartStrategy().canRestart()) {
    // delegate the failure to a global fail that will check the restart strategy and not restart
    executionGraph.failGlobal(cause);
  }
  else {
    cancel(taskExecution.getGlobalModVersion());
  }
}

if (transitionState(JobStatus.CANCELED, JobStatus.CREATED)) {
  restart(globalModVersionOfFailover);
  failover(globalModVersionOfFailover);
failover(globalModVersionOfFailover);

private void allVerticesInTerminalState(long globalModVersionOfFailover) {
  while (true) {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.CANCELLING)) {
      if (transitionState(curStatus, JobStatus.CANCELED)) {
        reset(globalModVersionOfFailover);
        break;
      }
    }
    else {
      LOG.info("FailoverRegion {} is {} when allVerticesInTerminalState.", id, state);
      break;
    }
  }
}

private void failover(long globalModVersionOfFailover) {
  if (!executionGraph.getRestartStrategy().canRestart()) {
    executionGraph.failGlobal(new FlinkException("RestartStrategy validate fail"));
  }
  else {
    JobStatus curStatus = this.state;
    if (curStatus.equals(JobStatus.RUNNING)) {
      cancel(globalModVersionOfFailover);
    }
    else if (curStatus.equals(JobStatus.CANCELED)) {
      reset(globalModVersionOfFailover);
    }
    else {
      LOG.info("FailoverRegion {} is {} when notified to failover.", id, state);
    }
  }
}

private void restart(long globalModVersionOfFailover) {
  try {
    if (transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {
          failover(globalModVersionOfFailover);
      failover(globalModVersionOfFailover);
    failover(globalModVersionOfFailover);

Javadoc

FailoverRegion manages the failover of a minimal pipeline connected sub graph. It will change from CREATED to CANCELING and then to CANCELLED and at last to RUNNING,

Most used methods

<init>
cancel
failover
Notify the region to failover.
onExecutionFail
reset
restart
Restart the region by notify the schedule plugin.
transitionState
allVerticesInTerminalState

Popular in Java

Finding current android device location
onRequestPermissionsResult (Fragment)
setContentView (Activity)
putExtra (Intent)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
JTable (javax.swing)
CodeWhisperer alternatives

How to useFailoverRegion in org.apache.flink.runtime.executiongraph.failover

Best Java code snippets using org.apache.flink.runtime.executiongraph.failover.FailoverRegion (Showing top 20 results out of 315)

How to use
FailoverRegion
in
org.apache.flink.runtime.executiongraph.failover