hosts .stream() .map(hn -> (Callable<TransportAddress[]>) () -> transportService.addressesFromString(hn, limitPortCounts)) .collect(Collectors.toList()); final List<Future<TransportAddress[]>> futures; localAddresses.add(transportService.boundAddress().publishAddress()); localAddresses.addAll(Arrays.asList(transportService.boundAddress().boundAddresses())); try { final TransportAddress[] addresses = future.get(); logger.trace("resolved host [{}] to {}", hostname, addresses); for (int addressId = 0; addressId < addresses.length; addressId++) { final TransportAddress address = addresses[addressId]; assert e.getCause() != null; final String message = "failed to resolve host [" + hostname + "]"; logger.warn(message, e.getCause()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); logger.warn("timed out after [{}] resolving host [{}]", resolveTimeout, hostname);
try { transportService.connectToNode(masterNode); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("failed to connect to master [{}], retrying...", masterNode), e); return false; while (true) { try { logger.trace("joining master {}", masterNode); membership.sendJoinRequestBlocking(masterNode, transportService.getLocalNode(), joinTimeout); return true; } catch (Exception e) { if (unwrap instanceof NotMasterException) { if (++joinAttempt == this.joinRetryAttempts) { logger.info("failed to send join request to master [{}], reason [{}], tried [{}] times", masterNode, ExceptionsHelper.detailedMessage(e), joinAttempt); return false; if (logger.isTraceEnabled()) { logger.trace(() -> new ParameterizedMessage("failed to send join request to master [{}]", masterNode), e); } else { logger.info("failed to send join request to master [{}], reason [{}]", masterNode,
ArrayList<DiscoveryNode> newFilteredNodes = new ArrayList<>(); for (DiscoveryNode listedNode : listedNodes) { try (Transport.Connection connection = transportService.openConnection(listedNode, LISTED_NODES_PROFILE)){ final PlainTransportFuture<LivenessResponse> handler = new PlainTransportFuture<>( new FutureTransportResponseHandler<LivenessResponse>() { transportService.sendRequest(connection, TransportLivenessAction.NAME, new LivenessRequest(), TransportRequestOptions.builder().withType(TransportRequestOptions.Type.STATE).withTimeout(pingTimeout).build(), handler); final LivenessResponse livenessResponse = handler.txGet(); if (!ignoreClusterName && !clusterName.equals(livenessResponse.getClusterName())) { logger.warn("node {} not part of the cluster {}, ignoring...", listedNode, clusterName); newFilteredNodes.add(listedNode); } else { newNodes.add(new DiscoveryNode(nodeWithInfo.getName(), nodeWithInfo.getId(), nodeWithInfo.getEphemeralId(), nodeWithInfo.getHostName(), nodeWithInfo.getHostAddress(), listedNode.getAddress(), nodeWithInfo.getAttributes(), nodeWithInfo.getRoles(), nodeWithInfo.getVersion())); logger.debug(() -> new ParameterizedMessage("failed to connect to node [{}], ignoring...", listedNode), e); hostFailureListener.onNodeDisconnected(listedNode, e); } catch (Exception e) { logger.info(() -> new ParameterizedMessage("failed to get node info for {}, disconnecting...", listedNode), e);
protected HandledTransportAction(Settings settings, String actionName, boolean canTripCircuitBreaker, ThreadPool threadPool, TransportService transportService, ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver, Supplier<Request> request) { super(settings, actionName, threadPool, actionFilters, indexNameExpressionResolver, transportService.getTaskManager()); transportService.registerRequestHandler(actionName, request, ThreadPool.Names.SAME, false, canTripCircuitBreaker, new TransportHandler()); }
public <T extends TransportResponse> void sendRequest(final DiscoveryNode node, final String action, final TransportRequest request, final TransportResponseHandler<T> handler) { try { Transport.Connection connection = getConnection(node); sendRequest(connection, action, request, TransportRequestOptions.EMPTY, handler); } catch (NodeNotConnectedException ex) { // the caller might not handle this so we invoke the handler handler.handleException(ex); } }
/** * Establishes the node connections. If validateInHandshake is set to true, the connection will fail if * node returned in the handshake response is different than the discovery node. */ List<DiscoveryNode> establishNodeConnections(Set<DiscoveryNode> nodes) { for (Iterator<DiscoveryNode> it = nodes.iterator(); it.hasNext(); ) { DiscoveryNode node = it.next(); if (!transportService.nodeConnected(node)) { try { logger.trace("connecting to node [{}]", node); transportService.connectToNode(node); } catch (Exception e) { it.remove(); logger.debug(() -> new ParameterizedMessage("failed to connect to discovered node [{}]", node), e); } } } return Collections.unmodifiableList(new ArrayList<>(nodes)); } }
if (cachedDiscoNodes != null && (refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) { logger.trace("using cache to retrieve node list"); return cachedDiscoNodes; logger.debug("start building nodes list using Kubernetes API"); Endpoints endpoints = kubernetesAPIService.endpoints(); if (endpoints == null || endpoints.getSubsets() == null || endpoints.getSubsets().isEmpty()) { logger.warn("no endpoints found for service [{}], namespace [{}].", this.serviceName, this.namespace); return cachedDiscoNodes; endpointSubset.getPorts().stream().forEach((port) -> { try { TransportAddress[] addresses = transportService.addressesFromString(formattedEndpointAddress + ":" + port.getPort(), 1); logger.info("adding endpoint {}, transport_address {}", endpointAddress, transportAddress); cachedDiscoNodes.add(new DiscoveryNode("#cloud-" + endpointAddress + "-" + 0, transportAddress, Version.CURRENT.minimumCompatibilityVersion()));
transportService.getTaskManager().setTaskResultsService(injector.getInstance(TaskResultsService.class)); transportService.start(); assert localNodeFactory.getNode() != null; assert transportService.getLocalNode().equals(localNodeFactory.getNode()) : "transportService has a different local node than the factory provided"; final MetaData onDiskMetadata; throw new UncheckedIOException(e); validateNodeBeforeAcceptingRequests(new BootstrapContext(environment, onDiskMetadata), transportService.boundAddress(), pluginsService .filterPlugins(Plugin .class) .flatMap(p -> p.getBootstrapChecks().stream()).collect(Collectors.toList())); clusterService.addStateApplier(transportService.getTaskManager()); transportService.acceptIncomingRequests(); discovery.startInitialJoin(); latch.countDown(); }, state -> state.nodes().getMasterNodeId() != null, initialStateTimeout); writePortsFile("transport", transport.boundAddress()); logger.info("started");
try { final ShardRouting primaryShard = shardRoutingTable.primaryShard(); final DiscoveryNode primaryNode = state.nodes().get(primaryShard.currentNodeId()); if (primaryNode == null) { logger.trace("{} failed to resolve node for primary shard {}, skipping sync", shardId, primaryShard); listener.onResponse(new InFlightOpsResponse(-1)); return; logger.trace("{} retrieving in flight operation count", shardId); transportService.sendRequest(primaryNode, IN_FLIGHT_OPS_ACTION_NAME, new InFlightOpsRequest(shardId), new TransportResponseHandler<InFlightOpsResponse>() { @Override
ClusterStateObserver observer = new ClusterStateObserver(currentState, clusterService, null, logger, threadPool.getThreadContext()); DiscoveryNode masterNode = currentState.nodes().getMasterNode(); Predicate<ClusterState> changePredicate = MasterNodeChangePredicate.build(currentState); if (masterNode == null) { logger.warn("no master known for action [{}] for shard entry [{}]", actionName, request); waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate); } else { logger.debug("sending [{}] to [{}] for shard entry [{}]", actionName, masterNode.getId(), request); transportService.sendRequest(masterNode, actionName, request, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override
@Override public void handleException(TransportException exp) { if (!running()) { return; } if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { handleTransportDisconnect(node); return; } retryCount++; logger.trace( () -> new ParameterizedMessage( "[node ] failed to ping [{}], retry [{}] out of [{}]", node, retryCount, pingRetryCount), exp); if (retryCount >= pingRetryCount) { logger.debug("[node ] failed to ping [{}], tried [{}] times, each with maximum [{}] timeout", node, pingRetryCount, pingRetryTimeout); // not good, failure if (nodesFD.remove(node, NodeFD.this)) { notifyNodeFailure(node, "failed to ping, tried [" + pingRetryCount + "] times, each with maximum [" + pingRetryTimeout + "] timeout"); } } else { // resend the request, not reschedule, rely on send timeout transportService.sendRequest(node, PING_ACTION_NAME, newPingRequest(), options, this); } }
logger.info("closing ..."); List<Closeable> toClose = new ArrayList<>(); StopWatch stopWatch = new StopWatch("node_close"); toClose.add(() -> stopWatch.stop().start("discovery")); toClose.add(injector.getInstance(Discovery.class)); toClose.add(() -> stopWatch.stop().start("monitor")); toClose.add(nodeService.getMonitorService()); toClose.add(() -> stopWatch.stop().start("gateway")); toClose.add(injector.getInstance(BigArrays.class)); if (logger.isTraceEnabled()) { logger.trace("Close times for each service:\n{}", stopWatch.prettyPrint());
private void sendCommitToNode(final DiscoveryNode node, final ClusterState clusterState, final SendingController sendingController) { try { logger.trace("sending commit for cluster state (uuid: [{}], version [{}]) to [{}]", clusterState.stateUUID(), clusterState.version(), node); transportService.sendRequest(node, COMMIT_ACTION_NAME, new CommitClusterStateRequest(clusterState.stateUUID()), stateRequestOptions, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleResponse(TransportResponse.Empty response) { if (sendingController.getPublishingTimedOut()) { logger.debug("node {} responded to cluster state commit [{}]", node, clusterState.version()); } sendingController.getPublishResponseHandler().onResponse(node); } @Override public void handleException(TransportException exp) { logger.debug(() -> new ParameterizedMessage("failed to commit cluster state (uuid [{}], version [{}]) to {}", clusterState.stateUUID(), clusterState.version(), node), exp); sendingController.getPublishResponseHandler().onFailure(node, exp); } }); } catch (Exception t) { logger.warn(() -> new ParameterizedMessage("error sending cluster state commit (uuid [{}], version [{}]) to {}", clusterState.stateUUID(), clusterState.version(), node), t); sendingController.getPublishResponseHandler().onFailure(node, t); } }
/** Updates the shard snapshot status by sending a {@link UpdateIndexShardSnapshotStatusRequest} to the master node */ void sendSnapshotShardUpdate(final Snapshot snapshot, final ShardId shardId, final ShardSnapshotStatus status, final DiscoveryNode masterNode) { try { if (masterNode.getVersion().onOrAfter(Version.V_6_1_0)) { UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status); transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, request, INSTANCE_SAME); } else { UpdateSnapshotStatusRequestV6 requestV6 = new UpdateSnapshotStatusRequestV6(snapshot, shardId, status); transportService.sendRequest(masterNode, UPDATE_SNAPSHOT_STATUS_ACTION_NAME_V6, requestV6, INSTANCE_SAME); } } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e); } }
private void sendRemoveBanRequest(DiscoveryNodes nodes, BanParentTaskRequest request) { for (ObjectObjectCursor<String, DiscoveryNode> node : nodes.getNodes()) { logger.debug("Sending remove ban for tasks with the parent [{}] to the node [{}]", request.parentTaskId, node.key); transportService.sendRequest(node.value, BAN_PARENT_ACTION_NAME, request, EmptyTransportResponseHandler .INSTANCE_SAME); } }
public NodesFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, Supplier<ClusterState> clusterStateSupplier, ClusterName clusterName) { super(settings, threadPool, transportService, clusterName); this.clusterStateSupplier = clusterStateSupplier; logger.debug("[node ] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount); transportService.registerRequestHandler( PING_ACTION_NAME, PingRequest::new, ThreadPool.Names.SAME, false, false, new PingRequestHandler()); }
private void handleAnotherMaster(ClusterState localClusterState, final DiscoveryNode otherMaster, long otherClusterStateVersion, String reason) { assert localClusterState.nodes().isLocalNodeElectedMaster() : "handleAnotherMaster called but current node is not a master"; assert Thread.holdsLock(stateMutex); if (otherClusterStateVersion > localClusterState.version()) { rejoin("zen-disco-discovered another master with a new cluster_state [" + otherMaster + "][" + reason + "]"); } else { // TODO: do this outside mutex logger.warn("discovered [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster ([{}])", otherMaster, otherMaster, reason); try { // make sure we're connected to this node (connect to node does nothing if we're already connected) // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node // in the past (after a master failure, for example) transportService.connectToNode(otherMaster); transportService.sendRequest(otherMaster, DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(localClusterState.nodes().getLocalNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleException(TransportException exp) { logger.warn(() -> new ParameterizedMessage("failed to send rejoin request to [{}]", otherMaster), exp); } }); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("failed to send rejoin request to [{}]", otherMaster), e); } } }
public void verify(String repository, boolean readOnly, String verificationToken, final ActionListener<VerifyResponse> listener) { final DiscoveryNodes discoNodes = clusterService.state().nodes(); final DiscoveryNode localNode = discoNodes.getLocalNode(); final ObjectContainer<DiscoveryNode> masterAndDataNodes = discoNodes.getMasterAndDataNodes().values(); final List<DiscoveryNode> nodes = new ArrayList<>(); for (ObjectCursor<DiscoveryNode> cursor : masterAndDataNodes) { DiscoveryNode node = cursor.value; if (readOnly && node.getVersion().before(Version.V_6_6_0)) { continue; final AtomicInteger counter = new AtomicInteger(nodes.size()); for (final DiscoveryNode node : nodes) { if (node.equals(localNode)) { try { doVerify(repository, verificationToken, localNode); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("[{}] failed to verify repository", repository), e); errors.add(new VerificationFailure(node.getId(), e)); transportService.sendRequest(node, ACTION_NAME, new VerifyNodeRepositoryRequest(repository, verificationToken), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleResponse(TransportResponse.Empty response) {
void handleJoinRequest(final DiscoveryNode node, final ClusterState state, final MembershipAction.JoinCallback callback) { if (nodeJoinController == null) { throw new IllegalStateException("discovery module is not yet started"); } else { // we do this in a couple of places including the cluster update thread. This one here is really just best effort // to ensure we fail as fast as possible. onJoinValidators.stream().forEach(a -> a.accept(node, state)); if (state.getBlocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) { MembershipAction.ensureMajorVersionBarrier(node.getVersion(), state.getNodes().getMinNodeVersion()); } // try and connect to the node, if it fails, we can raise an exception back to the client... transportService.connectToNode(node); // validate the join request, will throw a failure if it fails, which will get back to the // node calling the join request try { membership.sendValidateJoinRequestBlocking(node, state, joinTimeout); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("failed to validate incoming join request from node [{}]", node), e); callback.onFailure(new IllegalStateException("failure when sending a validation request to node", e)); return; } nodeJoinController.handleJoinRequest(node, callback); } }