/** * supervisor daemon enter entrance. * * @param args */ public static void main(String[] args) throws Exception { Utils.setupDefaultUncaughtExceptionHandler(); StormMetricsRegistry metricsRegistry = new StormMetricsRegistry(); @SuppressWarnings("resource") Supervisor instance = new Supervisor(new StandaloneSupervisor(), metricsRegistry); instance.launchDaemon(); }
/** * start distribute supervisor. */ public void launchDaemon() { LOG.info("Starting supervisor for storm version '{}'.", VersionInfo.getVersion()); try { Map<String, Object> conf = getConf(); if (ConfigUtils.isLocalMode(conf)) { throw new IllegalArgumentException("Cannot start server in local mode!"); } launch(); metricsRegistry.registerGauge("supervisor:num-slots-used-gauge", () -> SupervisorUtils.supervisorWorkerIds(conf).size()); //This will only get updated once metricsRegistry.registerMeter("supervisor:num-launched").mark(); metricsRegistry.registerMeter("supervisor:num-shell-exceptions", ShellUtils.numShellExceptions); metricsRegistry.startMetricsReporters(conf); Utils.addShutdownHookWithForceKillIn1Sec(() -> { metricsRegistry.stopMetricsReporters(); this.close(); }); // blocking call under the hood, must invoke after launch cause some services must be initialized launchSupervisorThriftServer(conf); } catch (Exception e) { LOG.error("Failed to start supervisor\n", e); System.exit(1); } }
public ReadClusterState(Supervisor supervisor) throws Exception { this.superConf = supervisor.getConf(); this.stormClusterState = supervisor.getStormClusterState(); this.assignmentId = supervisor.getAssignmentId(); this.supervisorPort = supervisor.getThriftServerPort(); this.iSuper = supervisor.getiSupervisor(); this.localizer = supervisor.getAsyncLocalizer(); this.host = supervisor.getHostName(); this.localState = supervisor.getLocalState(); this.cachedAssignments = supervisor.getCurrAssignment(); this.metricsExec = new OnlyLatestExecutor<>(supervisor.getHeartbeatExecutor()); this.slotMetrics = supervisor.getSlotMetrics(); supervisor.getSharedContext(), supervisor.getMetricsRegistry(), supervisor.getContainerMemoryTracker()); supervisor.killWorkers(detachedRunningWorkers, launcher);
public SupervisorHeartbeat(Map<String, Object> conf, Supervisor supervisor) { this.stormClusterState = supervisor.getStormClusterState(); this.supervisorId = supervisor.getId(); this.supervisor = supervisor; this.conf = conf; }
@Override public void sendSupervisorAssignments(SupervisorAssignments assignments) throws AuthorizationException, TException { checkAuthorization("sendSupervisorAssignments"); LOG.info("Got an assignments from master, will start to sync with assignments: {}", assignments); SynchronizeAssignments syn = new SynchronizeAssignments(getSupervisor(), assignments, getReadClusterState()); getEventManger().add(syn); }
public void shutdownAllWorkers(UniFunc<Slot> onWarnTimeout, UniFunc<Slot> onErrorTimeout) { if (readState != null) { readState.shutdownAllWorkers(onWarnTimeout, onErrorTimeout); } else { try { ContainerLauncher launcher = ContainerLauncher.make(getConf(), getId(), getThriftServerPort(), getSharedContext(), getMetricsRegistry(), getContainerMemoryTracker()); killWorkers(SupervisorUtils.supervisorWorkerIds(conf), launcher); } catch (Exception e) { throw Utils.wrapInRuntime(e); } } }
public ReadClusterState(Supervisor supervisor) throws Exception { this.superConf = supervisor.getConf(); this.stormClusterState = supervisor.getStormClusterState(); this.syncSupEventManager = supervisor.getEventManger(); this.assignmentVersions = new AtomicReference<Map<String, VersionedData<Assignment>>>(new HashMap<String, VersionedData<Assignment>>()); this.assignmentId = supervisor.getAssignmentId(); this.iSuper = supervisor.getiSupervisor(); this.localizer = supervisor.getAsyncLocalizer(); this.host = supervisor.getHostName(); this.localState = supervisor.getLocalState(); this.clusterState = supervisor.getStormClusterState(); this.cachedAssignments = supervisor.getCurrAssignment(); this.launcher = ContainerLauncher.make(superConf, assignmentId, supervisor.getSharedContext()); supervisor.killWorkers(workers, launcher);
private SupervisorInfo buildSupervisorInfo(Map<String, Object> conf, Supervisor supervisor) { SupervisorInfo supervisorInfo = new SupervisorInfo(); supervisorInfo.set_time_secs(Time.currentTimeSecs()); supervisorInfo.set_hostname(supervisor.getHostName()); supervisorInfo.set_assignment_id(supervisor.getAssignmentId()); supervisorInfo.set_server_port(supervisor.getThriftServerPort()); List<Long> usedPorts = new ArrayList<>(); usedPorts.addAll(supervisor.getCurrAssignment().get().keySet()); supervisorInfo.set_used_ports(usedPorts); List metaDatas = (List) supervisor.getiSupervisor().getMetadata(); List<Long> portList = new ArrayList<>(); if (metaDatas != null) { for (Object data : metaDatas) { Integer port = ObjectReader.getInt(data); if (port != null) { portList.add(port.longValue()); } } } supervisorInfo.set_meta(portList); supervisorInfo.set_scheduler_meta((Map<String, String>) conf.get(DaemonConfig.SUPERVISOR_SCHEDULER_META)); supervisorInfo.set_uptime_secs(supervisor.getUpTime().upTime()); supervisorInfo.set_version(supervisor.getStormVersion()); supervisorInfo.set_resources_map(mkSupervisorCapacities(conf)); return supervisorInfo; }
@Override public void run() { // first sync assignments to local, then sync processes. if (null == assignments) { getAssignmentsFromMaster(this.supervisor.getConf(), this.supervisor.getStormClusterState(), this.supervisor.getAssignmentId()); } else { assignedAssignmentsToLocal(this.supervisor.getStormClusterState(), assignments); } this.readClusterState.run(); }
public void shutdownAllWorkers(UniFunc<Slot> onWarnTimeout, UniFunc<Slot> onErrorTimeout) { if (readState != null) { readState.shutdownAllWorkers(onWarnTimeout, onErrorTimeout); } else { try { ContainerLauncher launcher = ContainerLauncher.make(getConf(), getId(), getSharedContext()); killWorkers(SupervisorUtils.supervisorWorkerIds(conf), launcher); } catch (Exception e) { throw Utils.wrapInRuntime(e); } } }
Supervisor s = new Supervisor(superConf, sharedContext, isuper, metricRegistry); s.launch(); s.setLocalNimbus(this.nimbus); this.nimbus.addSupervisor(s); supervisors.add(s);
public static void main(String[] args) throws Exception { Map<String, Object> conf = Utils.readStormConfig(); try (Supervisor supervisor = new Supervisor(conf, null, new StandaloneSupervisor(), new StormMetricsRegistry())) { supervisor.shutdownAllWorkers(null, null); } } }
@Override public Assignment getLocalAssignmentForStorm(String id) throws NotAliveException, AuthorizationException, TException { Map<String, Object> topoConf = null; try { topoConf = ConfigUtils.readSupervisorStormConf(conf, id); } catch (IOException e) { LOG.warn("Topology config is not localized yet..."); } checkAuthorization(id, topoConf, "getLocalAssignmentForStorm"); Assignment assignment = getStormClusterState().assignmentInfo(id, null); if (null == assignment) { throw new WrappedNotAliveException("No local assignment assigned for storm: " + id + " for node: " + getHostName()); } return assignment; }
@Override public void run() { Map<String, Object> conf = supervisor.getConf(); LOG.info("Running supervisor healthchecks..."); int healthCode = HealthChecker.healthCheck(conf); if (healthCode != 0) { LOG.info("The supervisor healthchecks FAILED..."); supervisor.shutdownAllWorkers(null, null); throw new RuntimeException("Supervisor failed health check. Exiting."); } } }
/** * Kill a specific supervisor. This is intended mostly for internal testing. * * @param id the id of the supervisor */ public synchronized void killSupervisor(String id) { for (Iterator<Supervisor> it = supervisors.iterator(); it.hasNext(); ) { Supervisor s = it.next(); if (id.equals(s.getId())) { it.remove(); s.close(); //tmpDir will be handled separately return; } } }
/** * start distribute supervisor */ public void launchDaemon() { LOG.info("Starting supervisor for storm version '{}'.", VersionInfo.getVersion()); try { Map<String, Object> conf = getConf(); if (ConfigUtils.isLocalMode(conf)) { throw new IllegalArgumentException("Cannot start server in local mode!"); } launch(); Utils.addShutdownHookWithForceKillIn1Sec(new Runnable(){ @Override public void run() { close(); } }); registerWorkerNumGauge("supervisor:num-slots-used-gauge", conf); StormMetricsRegistry.startMetricsReporters(conf); } catch (Exception e) { LOG.error("Failed to start supervisor\n", e); System.exit(1); } }
@Override public void run() { try { Map<String, Object> conf = supervisor.getConf(); Set<String> downloadedStormIds = SupervisorUtils.readDownloadedTopologyIds(conf); AtomicReference<Map<Long, LocalAssignment>> newAssignment = supervisor.getCurrAssignment(); Map<String, LocalAssignment> assignedStormIds = new HashMap<>(); for (LocalAssignment localAssignment : newAssignment.get().values()) { String stormRoot = ConfigUtils.supervisorStormDistRoot(conf, stormId); LOG.debug("Checking Blob updates for storm topology id {} With target_dir: {}", stormId, stormRoot); updateBlobsForTopology(conf, stormId, supervisor.getLocalizer(), la.get_owner());
FileUtils.cleanDirectory(new File(path)); Localizer localizer = getLocalizer(); LOG.info("Starting supervisor with id {} at host {}.", getId(), getHostName());
/** * Launch the supervisor. */ public void launch() throws Exception { LOG.info("Starting Supervisor with conf {}", ConfigUtils.maskPasswords(conf)); String path = ServerConfigUtils.supervisorTmpDir(conf); FileUtils.cleanDirectory(new File(path)); SupervisorHeartbeat hb = new SupervisorHeartbeat(conf, this); hb.run(); // should synchronize supervisor so it doesn't launch anything after being down (optimization) Integer heartbeatFrequency = ObjectReader.getInt(conf.get(DaemonConfig.SUPERVISOR_HEARTBEAT_FREQUENCY_SECS)); heartbeatTimer.scheduleRecurring(0, heartbeatFrequency, hb); this.eventManager = new EventManagerImp(false); this.readState = new ReadClusterState(this); asyncLocalizer.start(); if ((Boolean) conf.get(DaemonConfig.SUPERVISOR_ENABLE)) { // This isn't strictly necessary, but it doesn't hurt and ensures that the machine stays up // to date even if callbacks don't all work exactly right eventTimer.scheduleRecurring(0, 10, new EventManagerPushCallback(new SynchronizeAssignments(this, null, readState), eventManager)); // supervisor health check eventTimer.scheduleRecurring(30, 30, new SupervisorHealthCheck(this)); } ReportWorkerHeartbeats reportWorkerHeartbeats = new ReportWorkerHeartbeats(conf, this); Integer workerHeartbeatFrequency = ObjectReader.getInt(conf.get(Config.WORKER_HEARTBEAT_FREQUENCY_SECS)); workerHeartbeatTimer.scheduleRecurring(0, workerHeartbeatFrequency, reportWorkerHeartbeats); LOG.info("Starting supervisor with id {} at host {}.", getId(), getHostName()); }
s.shutdownAllWorkers(null, ReadClusterState.THREAD_DUMP_ON_ERROR); s.close();