/** * Returns <code>true</code> iff the local node is the mater node of the cluster. */ public boolean localNodeMaster() { return state.nodes().isLocalNodeElectedMaster(); }
private boolean localNodeMaster() { return clusterState().nodes().isLocalNodeElectedMaster(); }
@Override public void onPingReceived(final NodesFaultDetection.PingRequest pingRequest) { // if we are master, we don't expect any fault detection from another node. If we get it // means we potentially have two masters in the cluster. if (!localNodeMaster()) { pingsWhileMaster.set(0); return; } if (pingsWhileMaster.incrementAndGet() < maxPingsFromAnotherMaster) { logger.trace("got a ping from another master {}. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get()); return; } logger.debug("got a ping from another master {}. resolving who should rejoin. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get()); synchronized (stateMutex) { ClusterState currentState = committedState.get(); if (currentState.nodes().isLocalNodeElectedMaster()) { pingsWhileMaster.set(0); handleAnotherMaster(currentState, pingRequest.masterNode(), pingRequest.clusterStateVersion(), "node fd ping"); } } } }
@Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { if (newState.nodes().isLocalNodeElectedMaster()) { ElectionContext.this.onElectedAsMaster(newState); } else { onFailure(source, new NotMasterException("election stopped [" + source + "]")); } }
private void onElectedAsMaster(ClusterState state) { assert MasterService.assertMasterUpdateThread(); assert state.nodes().isLocalNodeElectedMaster() : "onElectedAsMaster called but local node is not master"; ElectionCallback callback = getCallback(); // get under lock if (callback != null) { callback.onElectedAsMaster(state); } }
clusterTasksResult = taskInputs.executor.execute(previousClusterState, inputs); if (previousClusterState != clusterTasksResult.resultingState && previousClusterState.nodes().isLocalNodeElectedMaster() && (clusterTasksResult.resultingState.nodes().isLocalNodeElectedMaster() == false)) { throw new AssertionError("update task submitted to MasterService cannot remove master");
@Override public void clusterChanged(ClusterChangedEvent event) { long currentNanoTime = currentNanoTime(); if (event.state().nodes().isLocalNodeElectedMaster()) { scheduleIfNeeded(currentNanoTime, event.state()); } }
if (state.nodes().isLocalNodeElectedMaster() == false) { return;
private void handleAnotherMaster(ClusterState localClusterState, final DiscoveryNode otherMaster, long otherClusterStateVersion, String reason) { assert localClusterState.nodes().isLocalNodeElectedMaster() : "handleAnotherMaster called but current node is not a master"; assert Thread.holdsLock(stateMutex); if (otherClusterStateVersion > localClusterState.version()) { rejoin("zen-disco-discovered another master with a new cluster_state [" + otherMaster + "][" + reason + "]"); } else { // TODO: do this outside mutex logger.warn("discovered [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster ([{}])", otherMaster, otherMaster, reason); try { // make sure we're connected to this node (connect to node does nothing if we're already connected) // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node // in the past (after a master failure, for example) transportService.connectToNode(otherMaster); transportService.sendRequest(otherMaster, DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(localClusterState.nodes().getLocalNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleException(TransportException exp) { logger.warn(() -> new ParameterizedMessage("failed to send rejoin request to [{}]", otherMaster), exp); } }); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("failed to send rejoin request to [{}]", otherMaster), e); } } }
/** * Finalizes a snapshot deletion in progress if the current node is the master but it * was not master in the previous cluster state and there is still a lingering snapshot * deletion in progress in the cluster state. This means that the old master failed * before it could clean up an in-progress snapshot deletion. We attempt to delete the * snapshot files and remove the deletion from the cluster state. It is possible that the * old master was in a state of long GC and then it resumes and tries to delete the snapshot * that has already been deleted by the current master. This is acceptable however, since * the old master's snapshot deletion will just respond with an error but in actuality, the * snapshot was deleted and a call to GET snapshots would reveal that the snapshot no longer exists. */ private void finalizeSnapshotDeletionFromPreviousMaster(ClusterChangedEvent event) { if (event.localNodeMaster() && event.previousState().nodes().isLocalNodeElectedMaster() == false) { SnapshotDeletionsInProgress deletionsInProgress = event.state().custom(SnapshotDeletionsInProgress.TYPE); if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) { assert deletionsInProgress.getEntries().size() == 1 : "only one in-progress deletion allowed per cluster"; SnapshotDeletionsInProgress.Entry entry = deletionsInProgress.getEntries().get(0); deleteSnapshotFromRepository(entry.getSnapshot(), null, entry.getRepositoryStateId()); } } }
@Override public void applyClusterState(ClusterChangedEvent event) { try { if (event.localNodeMaster()) { // We don't remove old master when master flips anymore. So, we need to check for change in master if (event.nodesRemoved() || event.previousState().nodes().isLocalNodeElectedMaster() == false) { processSnapshotsOnRemovedNodes(event); } if (event.routingTableChanged()) { processStartedShards(event); } removeFinishedSnapshotFromClusterState(event); finalizeSnapshotDeletionFromPreviousMaster(event); } } catch (Exception e) { logger.warn("Failed to update snapshot state ", e); } }
/** * Removes a finished snapshot from the cluster state. This can happen if the previous * master node processed a cluster state update that marked the snapshot as finished, * but the previous master node died before removing the snapshot in progress from the * cluster state. It is then the responsibility of the new master node to end the * snapshot and remove it from the cluster state. */ private void removeFinishedSnapshotFromClusterState(ClusterChangedEvent event) { if (event.localNodeMaster() && !event.previousState().nodes().isLocalNodeElectedMaster()) { SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); if (snapshotsInProgress != null && !snapshotsInProgress.entries().isEmpty()) { for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { if (entry.state().completed()) { endSnapshot(entry); } } } } }
@Override public void publish(ClusterChangedEvent clusterChangedEvent, AckListener ackListener) { ClusterState newState = clusterChangedEvent.state(); assert newState.getNodes().isLocalNodeElectedMaster() : "Shouldn't publish state when not master " + clusterChangedEvent.source();
if (!clusterService.state().nodes().isLocalNodeElectedMaster()) { logger.debug("Skipping reroute after cluster update settings, because node is no longer master"); listener.onResponse(new ClusterUpdateSettingsResponse(updateSettingsAcked, updater.getTransientUpdates(),
if (clusterService.state().nodes().isLocalNodeElectedMaster()) { clusterStatus = new ClusterStateHealth(clusterService.state()).getStatus();
private boolean removedNodesCleanupNeeded(ClusterChangedEvent event) { SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); if (snapshotsInProgress == null) { return false; } // Check if we just became the master boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster(); for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { if (newMaster && (snapshot.state() == State.SUCCESS || snapshot.state() == State.INIT)) { // We just replaced old master and snapshots in intermediate states needs to be cleaned return true; } for (DiscoveryNode node : event.nodesDelta().removedNodes()) { for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshot.shards().values()) { if (!shardStatus.value.state().completed() && node.getId().equals(shardStatus.value.nodeId())) { // At least one shard was running on the removed node - we need to fail it return true; } } } } return false; }
if (nodes.getMasterNode() == null) { } else if (!nodes.isLocalNodeElectedMaster()) { try { membership.sendLeaveRequestBlocking(nodes.getMasterNode(), nodes.getLocalNode(), TimeValue.timeValueSeconds(1));
/** * Returns true if the cluster state change(s) require to reassign some persistent tasks. It can happen in the following * situations: a node left or is added, the routing table changed, the master node changed, the metadata changed or the * persistent tasks changed. */ boolean shouldReassignPersistentTasks(final ClusterChangedEvent event) { final PersistentTasksCustomMetaData tasks = event.state().getMetaData().custom(PersistentTasksCustomMetaData.TYPE); if (tasks == null) { return false; } boolean masterChanged = event.previousState().nodes().isLocalNodeElectedMaster() == false; if (persistentTasksChanged(event) || event.nodesChanged() || event.routingTableChanged() || event.metaDataChanged() || masterChanged) { for (PersistentTask<?> task : tasks.tasks()) { if (needsReassignment(task.getAssignment(), event.state().nodes())) { Assignment assignment = createAssignment(task.getTaskName(), task.getParams(), event.state()); if (Objects.equals(assignment, task.getAssignment()) == false) { return true; } } } } return false; }
if (state.nodes().isLocalNodeElectedMaster() == false) {
} else if (currentNodes.isLocalNodeElectedMaster() == false) { logger.trace("processing node joins, but we are not the master. current master: {}", currentNodes.getMasterNode()); throw new NotMasterException("Node [" + currentNodes.getLocalNode() + "] not master for join request");