/** * Returns one active replica shard for the given shard id or <code>null</code> if * no active replica is found. * * Since replicas could possibly be on nodes with a older version of ES than * the primary is, this will return replicas on the highest version of ES. * */ public ShardRouting activeReplicaWithHighestVersion(ShardId shardId) { // It's possible for replicaNodeVersion to be null, when deassociating dead nodes // that have been removed, the shards are failed, and part of the shard failing // calls this method with an out-of-date RoutingNodes, where the version might not // be accessible. Therefore, we need to protect against the version being null // (meaning the node will be going away). return assignedShards(shardId).stream() .filter(shr -> !shr.primary() && shr.active()) .filter(shr -> node(shr.currentNodeId()) != null) .max(Comparator.comparing(shr -> node(shr.currentNodeId()).node(), Comparator.nullsFirst(Comparator.comparing(DiscoveryNode::getVersion)))) .orElse(null); }
private void updateAssigned(ShardRouting oldShard, ShardRouting newShard) { assert oldShard.shardId().equals(newShard.shardId()) : "can only update " + oldShard + " by shard with same shard id but was " + newShard; assert oldShard.unassigned() == false && newShard.unassigned() == false : "only assigned shards can be updated in list of assigned shards (prev: " + oldShard + ", new: " + newShard + ")"; assert oldShard.currentNodeId().equals(newShard.currentNodeId()) : "shard to update " + oldShard + " can only update " + oldShard + " by shard assigned to same node but was " + newShard; node(oldShard.currentNodeId()).update(oldShard, newShard); List<ShardRouting> shardsWithMatchingShardId = assignedShards.computeIfAbsent(oldShard.shardId(), k -> new ArrayList<>()); int previousShardIndex = shardsWithMatchingShardId.indexOf(oldShard); assert previousShardIndex >= 0 : "shard to update " + oldShard + " does not exist in list of assigned shards"; shardsWithMatchingShardId.set(previousShardIndex, newShard); }
/** * Removes shard entries from the failed shards cache that are no longer allocated to this node by the master. * Sends shard failures for shards that are marked as actively allocated to this node but don't actually exist on the node. * Resends shard failures for shards that are still marked as allocated to this node but previously failed. * * @param state new cluster state */ private void updateFailedShardsCache(final ClusterState state) { RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (localRoutingNode == null) { failedShardsCache.clear(); return; } DiscoveryNode masterNode = state.nodes().getMasterNode(); // remove items from cache which are not in our routing table anymore and resend failures that have not executed on master yet for (Iterator<Map.Entry<ShardId, ShardRouting>> iterator = failedShardsCache.entrySet().iterator(); iterator.hasNext(); ) { ShardRouting failedShardRouting = iterator.next().getValue(); ShardRouting matchedRouting = localRoutingNode.getByShardId(failedShardRouting.shardId()); if (matchedRouting == null || matchedRouting.isSameAllocation(failedShardRouting) == false) { iterator.remove(); } else { if (masterNode != null) { // TODO: can we remove this? Is resending shard failures the responsibility of shardStateAction? String message = "master " + masterNode + " has not removed previously failed shard. resending shard failure"; logger.trace("[{}] re-sending failed shard [{}], reason [{}]", matchedRouting.shardId(), matchedRouting, message); shardStateAction.localShardFailed(matchedRouting, message, null, SHARD_STATE_ACTION_LISTENER, state); } } } }
/** * Split the list of node shard states into groups yes/no/throttle based on allocation deciders */ private NodesToAllocate buildNodesToAllocate(RoutingAllocation allocation, List<NodeGatewayStartedShards> nodeShardStates, ShardRouting shardRouting, boolean forceAllocate) { List<DecidedNode> yesNodeShards = new ArrayList<>(); List<DecidedNode> throttledNodeShards = new ArrayList<>(); List<DecidedNode> noNodeShards = new ArrayList<>(); for (NodeGatewayStartedShards nodeShardState : nodeShardStates) { RoutingNode node = allocation.routingNodes().node(nodeShardState.getNode().getId()); if (node == null) { continue; } Decision decision = forceAllocate ? allocation.deciders().canForceAllocatePrimary(shardRouting, node, allocation) : allocation.deciders().canAllocate(shardRouting, node, allocation); DecidedNode decidedNode = new DecidedNode(nodeShardState, decision); if (decision.type() == Type.THROTTLE) { throttledNodeShards.add(decidedNode); } else if (decision.type() == Type.NO) { noNodeShards.add(decidedNode); } else { yesNodeShards.add(decidedNode); } } return new NodesToAllocate(Collections.unmodifiableList(yesNodeShards), Collections.unmodifiableList(throttledNodeShards), Collections.unmodifiableList(noNodeShards)); }
private IndicesShardStoresResponse.StoreStatus.AllocationStatus getAllocationStatus(String index, int shardID, DiscoveryNode node) { for (ShardRouting shardRouting : routingNodes.node(node.getId())) { ShardId shardId = shardRouting.shardId(); if (shardId.id() == shardID && shardId.getIndexName().equals(index)) { if (shardRouting.primary()) { return IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY; } else if (shardRouting.assignedToNode()) { return IndicesShardStoresResponse.StoreStatus.AllocationStatus.REPLICA; } else { return IndicesShardStoresResponse.StoreStatus.AllocationStatus.UNUSED; } } } return IndicesShardStoresResponse.StoreStatus.AllocationStatus.UNUSED; }
/** * Notifies master about shards that don't exist but are supposed to be active on this node. * * @param state new cluster state */ private void failMissingShards(final ClusterState state) { RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (localRoutingNode == null) { return; } for (final ShardRouting shardRouting : localRoutingNode) { ShardId shardId = shardRouting.shardId(); if (shardRouting.initializing() == false && failedShardsCache.containsKey(shardId) == false && indicesService.getShardOrNull(shardId) == null) { // the master thinks we are active, but we don't have this shard at all, mark it as failed sendFailShard(shardRouting, "master marked shard as active, but shard has not been created, mark shard as failed", null, state); } } }
RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId); if (localRoutingNode != null) { // null e.g. if we are not a data node for (ShardRouting shardRouting : localRoutingNode) {
public static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state, ClusterState previousState, Set<Index> previouslyWrittenIndices) { RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (newRoutingNode == null) { throw new IllegalStateException("cluster state does not contain this node - cannot write index meta state"); } Set<Index> indices = new HashSet<>(); for (ShardRouting routing : newRoutingNode) { indices.add(routing.index()); } // we have to check the meta data also: closed indices will not appear in the routing table, but we must still write the state if // we have it written on disk previously for (IndexMetaData indexMetaData : state.metaData()) { boolean isOrWasClosed = indexMetaData.getState().equals(IndexMetaData.State.CLOSE); // if the index is open we might still have to write the state if it just transitioned from closed to open // so we have to check for that as well. IndexMetaData previousMetaData = previousState.metaData().index(indexMetaData.getIndex()); if (previousMetaData != null) { isOrWasClosed = isOrWasClosed || previousMetaData.getState().equals(IndexMetaData.State.CLOSE); } if (previouslyWrittenIndices.contains(indexMetaData.getIndex()) && isOrWasClosed) { indices.add(indexMetaData.getIndex()); } } return indices; }
/** * Moves a shard from unassigned to initialize state * * @param existingAllocationId allocation id to use. If null, a fresh allocation id is generated. * @return the initialized shard */ public ShardRouting initializeShard(ShardRouting unassignedShard, String nodeId, @Nullable String existingAllocationId, long expectedSize, RoutingChangesObserver routingChangesObserver) { ensureMutable(); assert unassignedShard.unassigned() : "expected an unassigned shard " + unassignedShard; ShardRouting initializedShard = unassignedShard.initialize(nodeId, existingAllocationId, expectedSize); node(nodeId).add(initializedShard); inactiveShardCount++; if (initializedShard.primary()) { inactivePrimaryCount++; } addRecovery(initializedShard); assignedShardsAdd(initializedShard); routingChangesObserver.shardInitialized(unassignedShard, initializedShard); return initializedShard; }
/** * Relocate a shard to another node, adding the target initializing * shard as well as assigning it. * * @return pair of source relocating and target initializing shards. */ public Tuple<ShardRouting,ShardRouting> relocateShard(ShardRouting startedShard, String nodeId, long expectedShardSize, RoutingChangesObserver changes) { ensureMutable(); relocatingShards++; ShardRouting source = startedShard.relocate(nodeId, expectedShardSize); ShardRouting target = source.getTargetRelocatingShard(); updateAssigned(startedShard, source); node(target.currentNodeId()).add(target); assignedShardsAdd(target); addRecovery(target); changes.relocationStarted(startedShard, target); return Tuple.tuple(source, target); }
RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId); for (AllocatedIndex<? extends Shard> indexService : indicesService) { for (Shard shard : indexService) {
private void createOrUpdateShards(final ClusterState state) { RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (localRoutingNode == null) { return; } DiscoveryNodes nodes = state.nodes(); RoutingTable routingTable = state.routingTable(); for (final ShardRouting shardRouting : localRoutingNode) { ShardId shardId = shardRouting.shardId(); if (failedShardsCache.containsKey(shardId) == false) { AllocatedIndex<? extends Shard> indexService = indicesService.indexService(shardId.getIndex()); assert indexService != null : "index " + shardId.getIndex() + " should have been created by createIndices"; Shard shard = indexService.getShardOrNull(shardId.id()); if (shard == null) { assert shardRouting.initializing() : shardRouting + " should have been removed by failMissingShards"; createShard(nodes, routingTable, shardRouting, state); } else { updateShard(nodes, shardRouting, shard, routingTable, state); } } } }
Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null; for (ObjectCursor<DiscoveryNode> cursor : allocation.nodes().getDataNodes().values()) { RoutingNode node = allocation.routingNodes().node(cursor.value.getId()); if (node == null) { continue;
/** * Cancels the give shard from the Routing nodes internal statistics and cancels * the relocation if the shard is relocating. */ private void remove(ShardRouting shard) { assert shard.unassigned() == false : "only assigned shards can be removed here (" + shard + ")"; node(shard.currentNodeId()).remove(shard); if (shard.initializing() && shard.relocatingNodeId() == null) { inactiveShardCount--; assert inactiveShardCount >= 0; if (shard.primary()) { inactivePrimaryCount--; } } else if (shard.relocating()) { shard = cancelRelocation(shard); } assignedShardsRemove(shard); if (shard.initializing()) { removeRecovery(shard); } }
ShardRouting shardRouting = null; RoutingNodes routingNodes = allocation.routingNodes(); RoutingNode routingNode = routingNodes.node(discoNode.getId()); IndexMetaData indexMetaData = null; if (routingNode != null) {
private Decision isVersionCompatibleRelocatePrimary(final RoutingNodes routingNodes, final String sourceNodeId, final RoutingNode target, final RoutingAllocation allocation) { final RoutingNode source = routingNodes.node(sourceNodeId); if (target.node().getVersion().onOrAfter(source.node().getVersion())) { return allocation.decision(Decision.YES, NAME, "can relocate primary shard from a node with version [%s] to a node with equal-or-newer version [%s]", source.node().getVersion(), target.node().getVersion()); } else { return allocation.decision(Decision.NO, NAME, "cannot relocate primary shard from a node with version [%s] to a node with older version [%s]", source.node().getVersion(), target.node().getVersion()); } }
private Decision isVersionCompatibleAllocatingReplica(final RoutingNodes routingNodes, final String sourceNodeId, final RoutingNode target, final RoutingAllocation allocation) { final RoutingNode source = routingNodes.node(sourceNodeId); if (target.node().getVersion().onOrAfter(source.node().getVersion())) { /* we can allocate if we can recover from a node that is younger or on the same version * if the primary is already running on a newer version that won't work due to possible * differences in the lucene index format etc.*/ return allocation.decision(Decision.YES, NAME, "can allocate replica shard to a node with version [%s] since this is equal-or-newer than the primary version [%s]", target.node().getVersion(), source.node().getVersion()); } else { return allocation.decision(Decision.NO, NAME, "cannot allocate replica shard to a node with version [%s] since this is older than the primary version [%s]", target.node().getVersion(), source.node().getVersion()); } }
RoutingNode routingNode = routingNodes.node(discoNode.getId()); if (routingNode == null) { return explainOrThrowMissingRoutingNode(allocation, explain, discoNode);
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (localRoutingNode != null) { for (final ShardRouting shardRouting : localRoutingNode) {
RoutingNode routingNode = routingNodes.node(discoNode.getId()); if (routingNode == null) { return explainOrThrowMissingRoutingNode(allocation, explain, discoNode);