@Override protected synchronized void doStart() { Objects.requireNonNull(clusterStatePublisher, "please set a cluster state publisher before starting"); Objects.requireNonNull(nodeConnectionsService, "please set the node connection service before starting"); Objects.requireNonNull(discoverySettings, "please set discovery settings before starting"); addListener(localNodeMasterListeners); DiscoveryNode localNode = localNodeSupplier.get(); assert localNode != null; updateState(state -> { assert state.nodes().getLocalNodeId() == null : "local node is already set"; DiscoveryNodes nodes = DiscoveryNodes.builder(state.nodes()).add(localNode).localNodeId(localNode.getId()).build(); return ClusterState.builder(state).nodes(nodes).blocks(initialBlocks).build(); }); this.threadPoolExecutor = EsExecutors.newSinglePrioritizing(UPDATE_THREAD_NAME, daemonThreadFactory(settings, UPDATE_THREAD_NAME), threadPool.getThreadContext(), threadPool.scheduler()); this.taskBatcher = new ClusterServiceTaskBatcher(logger, threadPoolExecutor); }
protected ClusterState rejoin(ClusterState clusterState, String reason) { // *** called from within an cluster state update task *** // assert Thread.currentThread().getName().contains(InternalClusterService.UPDATE_THREAD_NAME); logger.warn(reason + ", current nodes: {}", clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()) .addGlobalBlock(discoverySettings.getNoMasterBlock()) .build(); // clean the nodes, we are now not connected to anybody, since we try and reform the cluster DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build(); // TODO: do we want to force a new thread if we actively removed the master? this is to give a full pinging cycle // before a decision is made. joinThreadControl.startNewThreadIfNotRunning(); return ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(discoveryNodes) .build(); }
} else { nodeAdded = true; nodesBuilder.put(node); for (DiscoveryNode existingNode : currentState.nodes()) { if (node.address().equals(existingNode.address())) { nodesBuilder.remove(existingNode.id()); logger.warn("received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode);
assert nodesBuilder.isLocalNodeElectedMaster(); nodesBuilder.add(node); nodesChanged = true; minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion());
assert nodesBuilder.isLocalNodeElectedMaster(); nodesBuilder.add(node); nodesChanged = true; minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion());
assert nodesBuilder.isLocalNodeElectedMaster(); } else { try { nodesBuilder.add(node); nodesChanged = true; } catch (IllegalArgumentException e) {
.build(); DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(previousClusterState.nodes()).masterNodeId(null).build(); newClusterState = ClusterState.builder(previousClusterState) .blocks(clusterBlocks)
clusterStateChanged = true; logger.info("[{}] removing node [{}]", tribeName, discoNode); nodes.remove(discoNode.getId()); clusterStateChanged = true; logger.info("[{}] adding node [{}]", tribeName, discoNode); nodes.remove(tribe.getId()); // remove any existing node with the same id but different ephemeral id nodes.add(discoNode);
clusterStateChanged = true; logger.info("[{}] removing node [{}]", tribeName, discoNode); nodes.remove(discoNode.getId()); clusterStateChanged = true; logger.info("[{}] adding node [{}]", tribeName, discoNode); nodes.remove(tribe.getId()); // remove any existing node with the same id but different ephemeral id nodes.add(discoNode);
@Override public ClusterTasksResult<Task> execute(final ClusterState currentState, final List<Task> tasks) throws Exception { final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); boolean removed = false; for (final Task task : tasks) { if (currentState.nodes().nodeExists(task.node())) { remainingNodesBuilder.remove(task.node()); removed = true; } else { logger.debug("node [{}] does not exist in cluster state, ignoring", task); } } if (!removed) { // no nodes to remove, keep the current cluster state return ClusterTasksResult.<Task>builder().successes(tasks).build(currentState); } final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); final ClusterTasksResult.Builder<Task> resultBuilder = ClusterTasksResult.<Task>builder().successes(tasks); if (electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes()) == false) { final int masterNodes = electMasterService.countMasterNodes(remainingNodesClusterState.nodes()); rejoin.accept(LoggerMessageFormat.format("not enough master nodes (has [{}], but needed [{}])", masterNodes, electMasterService.minimumMasterNodes())); return resultBuilder.build(currentState); } else { return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks))); } }
@Override public ClusterTasksResult<Task> execute(final ClusterState currentState, final List<Task> tasks) throws Exception { final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); boolean removed = false; for (final Task task : tasks) { if (currentState.nodes().nodeExists(task.node())) { remainingNodesBuilder.remove(task.node()); removed = true; } else { logger.debug("node [{}] does not exist in cluster state, ignoring", task); } } if (!removed) { // no nodes to remove, keep the current cluster state return ClusterTasksResult.<Task>builder().successes(tasks).build(currentState); } final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); final ClusterTasksResult.Builder<Task> resultBuilder = ClusterTasksResult.<Task>builder().successes(tasks); if (electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes()) == false) { final int masterNodes = electMasterService.countMasterNodes(remainingNodesClusterState.nodes()); rejoin.accept(LoggerMessageFormat.format("not enough master nodes (has [{}], but needed [{}])", masterNodes, electMasterService.minimumMasterNodes())); return resultBuilder.build(currentState); } else { return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks))); } }
@Override public ClusterTasksResult<Task> execute(final ClusterState currentState, final List<Task> tasks) throws Exception { final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); boolean removed = false; for (final Task task : tasks) { if (currentState.nodes().nodeExists(task.node())) { remainingNodesBuilder.remove(task.node()); removed = true; } else { logger.debug("node [{}] does not exist in cluster state, ignoring", task); } } if (!removed) { // no nodes to remove, keep the current cluster state return ClusterTasksResult.<Task>builder().successes(tasks).build(currentState); } final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); final ClusterTasksResult.Builder<Task> resultBuilder = ClusterTasksResult.<Task>builder().successes(tasks); if (electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes()) == false) { final int masterNodes = electMasterService.countMasterNodes(remainingNodesClusterState.nodes()); rejoin.accept(LoggerMessageFormat.format("not enough master nodes (has [{}], but needed [{}])", masterNodes, electMasterService.minimumMasterNodes())); return resultBuilder.build(currentState); } else { return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks))); } }
@Override public BatchResult<Task> execute(final ClusterState currentState, final List<Task> tasks) throws Exception { final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); boolean removed = false; for (final Task task : tasks) { if (currentState.nodes().nodeExists(task.node().getId())) { remainingNodesBuilder.remove(task.node().getId()); removed = true; } else { logger.debug("node [{}] does not exist in cluster state, ignoring", task); } } if (!removed) { // no nodes to remove, keep the current cluster state return BatchResult.<Task>builder().successes(tasks).build(currentState); } final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); final BatchResult.Builder<Task> resultBuilder = BatchResult.<Task>builder().successes(tasks); if (!electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes())) { return resultBuilder.build(rejoin.apply(remainingNodesClusterState, "not enough master nodes")); } else { final RoutingAllocation.Result routingResult = allocationService.reroute(remainingNodesClusterState, Joiner.on(",").join(tasks)); return resultBuilder.build(ClusterState.builder(remainingNodesClusterState).routingResult(routingResult).build()); } }
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) { assert currentState.nodes().getMasterNodeId() == null : currentState; DiscoveryNodes currentNodes = currentState.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes); nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId()); for (final DiscoveryNode joiningNode : joiningNodes) { final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId()); if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode); nodesBuilder.remove(nodeWithSameId.getId()); } final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress()); if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode); nodesBuilder.remove(nodeWithSameAddress.getId()); } } // now trim any left over dead nodes - either left there when the previous master stepped down // or removed by us above ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)).build(); return ClusterState.builder(allocationService.deassociateDeadNodes(tmpState, false, "removed dead nodes on election")); }
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) { assert currentState.nodes().getMasterNodeId() == null : currentState; DiscoveryNodes currentNodes = currentState.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes); nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId()); for (final DiscoveryNode joiningNode : joiningNodes) { if (joiningNode == FINISH_ELECTION_TASK || joiningNode == BECOME_MASTER_TASK) { continue; } final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId()); if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode); nodesBuilder.remove(nodeWithSameId.getId()); } final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress()); if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode); nodesBuilder.remove(nodeWithSameAddress.getId()); } } // now trim any left over dead nodes - either left there when the previous master stepped down // or removed by us above ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).build(); return ClusterState.builder(allocationService.deassociateDeadNodes(tmpState, false, "removed dead nodes on election")); }
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) { assert currentState.nodes().getMasterNodeId() == null : currentState; DiscoveryNodes currentNodes = currentState.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes); nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId()); for (final DiscoveryNode joiningNode : joiningNodes) { final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId()); if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode); nodesBuilder.remove(nodeWithSameId.getId()); } final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress()); if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode); nodesBuilder.remove(nodeWithSameAddress.getId()); } } // now trim any left over dead nodes - either left there when the previous master stepped down // or removed by us above ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)).build(); return ClusterState.builder(allocationService.deassociateDeadNodes(tmpState, false, "removed dead nodes on election")); }
public static ClusterService createClusterService(ThreadPool threadPool, DiscoveryNode localNode, ClusterSettings clusterSettings) { ClusterService clusterService = new ClusterService(Settings.builder().put("cluster.name", "ClusterServiceTests").build(), clusterSettings, threadPool, Collections.emptyMap()); clusterService.setNodeConnectionsService(new NodeConnectionsService(Settings.EMPTY, null, null) { @Override public void connectToNodes(DiscoveryNodes discoveryNodes) { // skip } @Override public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) { // skip } }); ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName())) .nodes(DiscoveryNodes.builder() .add(localNode) .localNodeId(localNode.getId()) .masterNodeId(localNode.getId())) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK).build(); clusterService.getClusterApplierService().setInitialState(initialClusterState); clusterService.getMasterService().setClusterStatePublisher( createClusterStatePublisher(clusterService.getClusterApplierService())); clusterService.getMasterService().setClusterStateSupplier(clusterService.getClusterApplierService()::state); clusterService.start(); return clusterService; }
@Override public ClusterState execute(ClusterState currentState) { // Rebuild state ClusterState.Builder stateBuilder = ClusterState.builder(currentState); // Rebuild nodes DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder() .localNodeId(localNode.id()) .masterNodeId(singleton.master().getNode().id()) .put(singleton.master().getNode()); for (ESNode node : singleton.slaves()) { nodesBuilder.put(node.getNode()); } latestDiscoNodes = nodesBuilder.build(); stateBuilder.nodes(latestDiscoNodes); for (DiscoveryNode node : latestDiscoNodes) { if (!currentState.nodes().nodeExists(node.id())) { transportService.connectToNode(node); } } // update the fact that we are the master... if (!localNode().id().equals(currentState.nodes().masterNodeId())) { ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NO_MASTER_BLOCK).build(); stateBuilder.blocks(clusterBlocks); } return stateBuilder.build(); }
protected void rejoin(String reason) { assert Thread.holdsLock(stateMutex); ClusterState clusterState = committedState.get(); logger.warn("{}, current nodes: {}", reason, clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); // TODO: do we want to force a new thread if we actively removed the master? this is to give a full pinging cycle // before a decision is made. joinThreadControl.startNewThreadIfNotRunning(); if (clusterState.nodes().getMasterNodeId() != null) { // remove block if it already exists before adding new one assert clusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock().id()) == false : "NO_MASTER_BLOCK should only be added by ZenDiscovery"; ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()) .addGlobalBlock(discoverySettings.getNoMasterBlock()) .build(); DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build(); clusterState = ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(discoveryNodes) .build(); committedState.set(clusterState); clusterApplier.onNewClusterState(reason, this::clusterState, (source, e) -> {}); // don't wait for state to be applied } }