@Override public Decision canAllocate(RoutingNode node, RoutingAllocation allocation) { // Only checks the node-level limit, not the index-level // Capture the limit here in case it changes during this method's // execution final int clusterShardLimit = this.clusterShardLimit; if (clusterShardLimit <= 0) { return allocation.decision(Decision.YES, NAME, "total shard limits are disabled: [cluster: %d] <= 0", clusterShardLimit); } int nodeShardCount = 0; for (ShardRouting nodeShard : node) { // don't count relocating shards... if (nodeShard.relocating()) { continue; } nodeShardCount++; } if (clusterShardLimit >= 0 && nodeShardCount >= clusterShardLimit) { return allocation.decision(Decision.NO, NAME, "too many shards [%d] allocated to this node, cluster setting [%s=%d]", nodeShardCount, CLUSTER_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), clusterShardLimit); } return allocation.decision(Decision.YES, NAME, "the shard count [%d] for this node is under the cluster level node limit [%d]", nodeShardCount, clusterShardLimit); } }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { return allocation.decision(Decision.YES, NAME, "shard is primary and can be allocated"); } ShardRouting primary = allocation.routingNodes().activePrimary(shardRouting.shardId()); if (primary == null) { return allocation.decision(Decision.NO, NAME, "primary shard for this replica is not yet active"); } return allocation.decision(Decision.YES, NAME, "primary shard for this replica is already active"); } }
@Override public Decision canRebalance(RoutingAllocation allocation) { if (clusterConcurrentRebalance == -1) { return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed"); } int relocatingShards = allocation.routingNodes().getRelocatingShardCount(); if (relocatingShards >= clusterConcurrentRebalance) { return allocation.decision(Decision.THROTTLE, NAME, "reached the limit of concurrently rebalancing shards [%d], cluster setting [%s=%d]", relocatingShards, CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(), clusterConcurrentRebalance); } return allocation.decision(Decision.YES, NAME, "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]", clusterConcurrentRebalance, relocatingShards); } }
/** * Utility method for rejecting the current allocation command based on provided exception */ protected RerouteExplanation explainOrThrowRejectedCommand(boolean explain, RoutingAllocation allocation, RuntimeException rte) { if (explain) { return new RerouteExplanation(this, allocation.decision(Decision.NO, name() + " (allocation command)", rte.getMessage())); } throw rte; }
private Decision shouldFilter(IndexMetaData indexMd, RoutingNode node, RoutingAllocation allocation) { Decision decision = shouldClusterFilter(node, allocation); if (decision != null) return decision; decision = shouldIndexFilter(indexMd, node, allocation); if (decision != null) return decision; return allocation.decision(Decision.YES, NAME, "node passes include/exclude/require filters"); }
@Override public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { if (!allocation.routingNodes().allReplicasActive(shardRouting.shardId(), allocation.metaData())) { return allocation.decision(Decision.NO, NAME, "rebalancing is not allowed until all replicas in the cluster are active"); } return allocation.decision(Decision.YES, NAME, "rebalancing is allowed as all replicas are active in the cluster"); } }
private Decision decideSameNode(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation, Iterable<ShardRouting> assignedShards) { for (ShardRouting assignedShard : assignedShards) { if (node.nodeId().equals(assignedShard.currentNodeId())) { if (assignedShard.isSameAllocation(shardRouting)) { return allocation.decision(Decision.NO, NAME, "the shard cannot be allocated to the node on which it already exists [%s]", shardRouting.toString()); } else { return allocation.decision(Decision.NO, NAME, "the shard cannot be allocated to the same node on which a copy of the shard already exists [%s]", assignedShard.toString()); } } } return allocation.decision(Decision.YES, NAME, "the shard does not exist on the same node"); } }
/** * Utility method for rejecting the current allocation command based on provided reason */ protected RerouteExplanation explainOrThrowRejectedCommand(boolean explain, RoutingAllocation allocation, String reason) { if (explain) { return new RerouteExplanation(this, allocation.decision(Decision.NO, name() + " (allocation command)", reason)); } throw new IllegalArgumentException("[" + name() + "] " + reason); }
private Decision shouldClusterFilter(RoutingNode node, RoutingAllocation allocation) { if (clusterRequireFilters != null) { if (clusterRequireFilters.match(node.node()) == false) { return allocation.decision(Decision.NO, NAME, "node does not match cluster setting [%s] filters [%s]", CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX, clusterRequireFilters); } } if (clusterIncludeFilters != null) { if (clusterIncludeFilters.match(node.node()) == false) { return allocation.decision(Decision.NO, NAME, "node does not cluster setting [%s] filters [%s]", CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX, clusterIncludeFilters); } } if (clusterExcludeFilters != null) { if (clusterExcludeFilters.match(node.node())) { return allocation.decision(Decision.NO, NAME, "node matches cluster setting [%s] filters [%s]", CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX, clusterExcludeFilters); } } return null; }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { final UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); final Decision decision; if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { decision = allocation.decision(Decision.NO, NAME, "shard has exceeded the maximum number of retries [%d] on " + "failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [%s]", maxRetry, unassignedInfo.toString()); } else { decision = allocation.decision(Decision.YES, NAME, "shard has failed allocating [%d] times but [%d] retries are allowed", unassignedInfo.getNumFailedAllocations(), maxRetry); } } else { decision = allocation.decision(Decision.YES, NAME, "shard has no previous failures"); } return decision; }
private Decision isVersionCompatible(SnapshotRecoverySource recoverySource, final RoutingNode target, final RoutingAllocation allocation) { if (target.node().getVersion().onOrAfter(recoverySource.version())) { /* we can allocate if we can restore from a snapshot that is older or on the same version */ return allocation.decision(Decision.YES, NAME, "node version [%s] is the same or newer than snapshot version [%s]", target.node().getVersion(), recoverySource.version()); } else { return allocation.decision(Decision.NO, NAME, "node version [%s] is older than the snapshot version [%s]", target.node().getVersion(), recoverySource.version()); } } }
private Decision shouldFilter(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { Decision decision = shouldClusterFilter(node, allocation); if (decision != null) return decision; decision = shouldIndexFilter(allocation.metaData().getIndexSafe(shardRouting.index()), node, allocation); if (decision != null) return decision; return allocation.decision(Decision.YES, NAME, "node passes include/exclude/require filters"); }
private Decision canMove(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { // Only primary shards are snapshotted SnapshotsInProgress snapshotsInProgress = allocation.custom(SnapshotsInProgress.TYPE); if (snapshotsInProgress == null || snapshotsInProgress.entries().isEmpty()) { // Snapshots are not running return allocation.decision(Decision.YES, NAME, "no snapshots are currently running"); } for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { SnapshotsInProgress.ShardSnapshotStatus shardSnapshotStatus = snapshot.shards().get(shardRouting.shardId()); if (shardSnapshotStatus != null && !shardSnapshotStatus.state().completed() && shardSnapshotStatus.nodeId() != null && shardSnapshotStatus.nodeId().equals(shardRouting.currentNodeId())) { if (logger.isTraceEnabled()) { logger.trace("Preventing snapshotted shard [{}] from being moved away from node [{}]", shardRouting.shardId(), shardSnapshotStatus.nodeId()); } return allocation.decision(Decision.THROTTLE, NAME, "waiting for snapshotting of shard [%s] to complete on this node [%s]", shardRouting.shardId(), shardSnapshotStatus.nodeId()); } } } return allocation.decision(Decision.YES, NAME, "the shard is not being snapshotted"); }
private Decision shouldIndexFilter(IndexMetaData indexMd, RoutingNode node, RoutingAllocation allocation) { if (indexMd.requireFilters() != null) { if (indexMd.requireFilters().match(node.node()) == false) { return allocation.decision(Decision.NO, NAME, "node does not match index setting [%s] filters [%s]", IndexMetaData.INDEX_ROUTING_REQUIRE_GROUP_PREFIX, indexMd.requireFilters()); } } if (indexMd.includeFilters() != null) { if (indexMd.includeFilters().match(node.node()) == false) { return allocation.decision(Decision.NO, NAME, "node does not match index setting [%s] filters [%s]", IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_PREFIX, indexMd.includeFilters()); } } if (indexMd.excludeFilters() != null) { if (indexMd.excludeFilters().match(node.node())) { return allocation.decision(Decision.NO, NAME, "node matches index setting [%s] filters [%s]", IndexMetaData.INDEX_ROUTING_EXCLUDE_GROUP_SETTING.getKey(), indexMd.excludeFilters()); } } return null; }
private Decision isVersionCompatibleRelocatePrimary(final RoutingNodes routingNodes, final String sourceNodeId, final RoutingNode target, final RoutingAllocation allocation) { final RoutingNode source = routingNodes.node(sourceNodeId); if (target.node().getVersion().onOrAfter(source.node().getVersion())) { return allocation.decision(Decision.YES, NAME, "can relocate primary shard from a node with version [%s] to a node with equal-or-newer version [%s]", source.node().getVersion(), target.node().getVersion()); } else { return allocation.decision(Decision.NO, NAME, "cannot relocate primary shard from a node with version [%s] to a node with older version [%s]", source.node().getVersion(), target.node().getVersion()); } }
@Override public Decision canAllocate(final ShardRouting shardRouting, final RoutingAllocation allocation) { final RecoverySource recoverySource = shardRouting.recoverySource(); if (recoverySource == null || recoverySource.getType() != RecoverySource.Type.SNAPSHOT) { return allocation.decision(Decision.YES, NAME, "ignored as shard is not being recovered from a snapshot"); } RecoverySource.SnapshotRecoverySource source = (RecoverySource.SnapshotRecoverySource) recoverySource; final RestoreInProgress restoresInProgress = allocation.custom(RestoreInProgress.TYPE); if (restoresInProgress != null) { RestoreInProgress.Entry restoreInProgress = restoresInProgress.get(source.restoreUUID()); if (restoreInProgress != null) { RestoreInProgress.ShardRestoreStatus shardRestoreStatus = restoreInProgress.shards().get(shardRouting.shardId()); if (shardRestoreStatus != null && shardRestoreStatus.state().completed() == false) { assert shardRestoreStatus.state() != RestoreInProgress.State.SUCCESS : "expected shard [" + shardRouting + "] to be in initializing state but got [" + shardRestoreStatus.state() + "]"; return allocation.decision(Decision.YES, NAME, "shard is currently being restored"); } } } return allocation.decision(Decision.NO, NAME, "shard has failed to be restored from the snapshot [%s] because of [%s] - " + "manually close or delete the index [%s] in order to retry to restore the snapshot again or use the reroute API to force the " + "allocation of an empty primary shard", source.snapshot(), shardRouting.unassignedInfo().getDetails(), shardRouting.getIndexName()); }
private Decision isVersionCompatibleAllocatingReplica(final RoutingNodes routingNodes, final String sourceNodeId, final RoutingNode target, final RoutingAllocation allocation) { final RoutingNode source = routingNodes.node(sourceNodeId); if (target.node().getVersion().onOrAfter(source.node().getVersion())) { /* we can allocate if we can recover from a node that is younger or on the same version * if the primary is already running on a newer version that won't work due to possible * differences in the lucene index format etc.*/ return allocation.decision(Decision.YES, NAME, "can allocate replica shard to a node with version [%s] since this is equal-or-newer than the primary version [%s]", target.node().getVersion(), source.node().getVersion()); } else { return allocation.decision(Decision.NO, NAME, "cannot allocate replica shard to a node with version [%s] since this is older than the primary version [%s]", target.node().getVersion(), source.node().getVersion()); } }
/** * Returns a {@link Decision} whether the given primary shard can be * forcibly allocated on the given node. This method should only be called * for unassigned primary shards where the node has a shard copy on disk. * * Note: all implementations that override this behavior should take into account * the results of {@link #canAllocate(ShardRouting, RoutingNode, RoutingAllocation)} * before making a decision on force allocation, because force allocation should only * be considered if all deciders return {@link Decision#NO}. */ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { assert shardRouting.primary() : "must not call canForceAllocatePrimary on a non-primary shard " + shardRouting; assert shardRouting.unassigned() : "must not call canForceAllocatePrimary on an assigned shard " + shardRouting; Decision decision = canAllocate(shardRouting, node, allocation); if (decision.type() == Type.NO) { // On a NO decision, by default, we allow force allocating the primary. return allocation.decision(Decision.YES, decision.label(), "primary shard [%s] allowed to force allocate on node [%s]", shardRouting.shardId(), node.nodeId()); } else { // On a THROTTLE/YES decision, we use the same decision instead of forcing allocation return decision; } } }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (shardRouting.unassigned()) { // only for unassigned - we filter allocation right after the index creation ie. for shard shrinking etc. to ensure // that once it has been allocated post API the replicas can be allocated elsewhere without user interaction // this is a setting that can only be set within the system! IndexMetaData indexMd = allocation.metaData().getIndexSafe(shardRouting.index()); DiscoveryNodeFilters initialRecoveryFilters = indexMd.getInitialRecoveryFilters(); if (initialRecoveryFilters != null && INITIAL_RECOVERY_TYPES.contains(shardRouting.recoverySource().getType()) && initialRecoveryFilters.match(node.node()) == false) { String explanation = (shardRouting.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) ? "initial allocation of the shrunken index is only allowed on nodes [%s] that hold a copy of every shard in the index" : "initial allocation of the index is only allowed on nodes [%s]"; return allocation.decision(Decision.NO, NAME, explanation, initialRecoveryFilters); } } return shouldFilter(shardRouting, node, allocation); }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (shardRouting.primary()) { if (shardRouting.currentNodeId() == null) { if (shardRouting.recoverySource() != null && shardRouting.recoverySource().getType() == RecoverySource.Type.SNAPSHOT) { // restoring from a snapshot - check that the node can handle the version return isVersionCompatible((SnapshotRecoverySource)shardRouting.recoverySource(), node, allocation); } else { // existing or fresh primary on the node return allocation.decision(Decision.YES, NAME, "the primary shard is new or already existed on the node"); } } else { // relocating primary, only migrate to newer host return isVersionCompatibleRelocatePrimary(allocation.routingNodes(), shardRouting.currentNodeId(), node, allocation); } } else { final ShardRouting primary = allocation.routingNodes().activePrimary(shardRouting.shardId()); // check that active primary has a newer version so that peer recovery works if (primary != null) { return isVersionCompatibleAllocatingReplica(allocation.routingNodes(), primary.currentNodeId(), node, allocation); } else { // ReplicaAfterPrimaryActiveAllocationDecider should prevent this case from occurring return allocation.decision(Decision.YES, NAME, "no active primary shard yet"); } } }