/** * Terminates all instances of the {@link PeriodicNotificationTwillApp} on the YARN cluster. */ public void stopApp() { LOG.info("Stopping any running instances..."); int counter = 0; // It is possible that we have launched multiple instances of the app. For now, stop them all, one at a time. for(final TwillController c : twillRunner.lookup(PeriodicNotificationTwillApp.APPLICATION_NAME)) { final ResourceReport report = c.getResourceReport(); LOG.info("Attempting to stop {} with YARN ApplicationId: {} and Twill RunId: {}", PeriodicNotificationTwillApp.APPLICATION_NAME, report.getApplicationId(), c.getRunId()); Futures.getUnchecked(c.terminate()); LOG.info("Stopped {} with YARN ApplicationId: {} and Twill RunId: {}", PeriodicNotificationTwillApp.APPLICATION_NAME, report.getApplicationId(), c.getRunId()); counter++; } LOG.info("Stopped {} instance(s) of {}", counter, PeriodicNotificationTwillApp.APPLICATION_NAME); }
@Override public void awaitTerminated() throws ExecutionException { delegate.awaitTerminated(); }
@Override public Future<Integer> changeInstances(String runnable, int newCount) { return delegate.changeInstances(runnable, newCount); }
twillController.onRunning(new Runnable() { @Override public void run() { twillController.onTerminated(new Runnable() { @Override public void run() {
when(twillController.getRunId()).thenReturn(runId); when(twillController.getResourceReport()).thenReturn(resourceReport); return futureController1; }).when(twillController).terminate(); }).when(twillController).onRunning(any(Runnable.class), any(Executor.class)); }).when(twillController).onTerminated(any(Runnable.class), any(Executor.class));
@Test public void testExtraOptions() throws InterruptedException, ExecutionException { // Start the testing app with jvm options at both global level as well as for the specific runnables. TwillController controller = getTwillRunner() .prepare(new JvmOptionsApplication()) .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out))) .setJVMOptions("-Dservice.name=default") .setJVMOptions("r2", "-Dservice.name=r2") .start(); // For r1 and r3 will be using "default" as the service name. waitForSize(controller.discoverService("default"), 2, 120); // r2 will be use "r2" as the service name. waitForSize(controller.discoverService("r2"), 1, 120); controller.terminate().get(); }
@Nullable @Override public ResourceReport call() throws Exception { return delegate.getResourceReport(); } });
@Override public Future<? extends ServiceController> call() throws Exception { return delegate.terminate(); } });
/** * Get the RunId associated with the Twill controller. * @return the Twill RunId */ public RunId getTwillRunId() { return twillController.getRunId(); }
@Test public void testAppSessionExpire() throws InterruptedException, ExecutionException, TimeoutException { TwillRunner runner = getTwillRunner(); TwillController controller = runner.prepare(new SleepRunnable(600)) .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .start(); final CountDownLatch runLatch = new CountDownLatch(1); controller.onRunning(new Runnable() { @Override public void run() { runLatch.countDown(); } }, Threads.SAME_THREAD_EXECUTOR); // Wait for application running Assert.assertTrue(runLatch.await(60, TimeUnit.SECONDS)); // Find the app master ZK session and expire it two times, 10 seconds apart. for (int i = 0; i < 2; i++) { Assert.assertTrue(expireAppMasterZKSession(controller, 10, TimeUnit.SECONDS)); try { controller.awaitTerminated(10, TimeUnit.SECONDS); Assert.fail("Unexpected application termination."); } catch (TimeoutException e) { // OK, expected. } } controller.terminate().get(120, TimeUnit.SECONDS); }
serviceDiscovered = controller.discoverService("DistributedTest"); Assert.assertTrue(waitForSize(serviceDiscovered, 4, 60)); controller.changeInstances("Alice", 4).get(60, TimeUnit.SECONDS); serviceDiscovered = controller.discoverService("DistributedTest"); Assert.assertTrue(waitForSize(serviceDiscovered, 6, 60)); controller.changeInstances("Alice", 1).get(60, TimeUnit.SECONDS); serviceDiscovered = controller.discoverService("DistributedTest"); Assert.assertTrue(waitForSize(serviceDiscovered, 3, 60)); controller.changeInstances("Bob", 3).get(60, TimeUnit.SECONDS); serviceDiscovered = controller.discoverService("DistributedTest"); Assert.assertTrue(waitForSize(serviceDiscovered, 5, 60)); controller.changeInstances("Eve", 3).get(60, TimeUnit.SECONDS); serviceDiscovered = controller.discoverService("DistributedTest"); Assert.assertTrue(waitForSize(serviceDiscovered, 7, 60)); } finally { controller.terminate().get(120, TimeUnit.SECONDS);
controller.onRunning(new Runnable() { @Override public void run() { Iterable<Discoverable> envEchoServices = controller.discoverService("envecho"); Assert.assertTrue(waitForSize(envEchoServices, 1, 120)); controller.terminate().get(120, TimeUnit.SECONDS);
LOG.warn("Stopping one extra instance of {}", Constants.Service.MASTER_SERVICES); try { controller.terminate(); controller.awaitTerminated(); } catch (ExecutionException e) { LOG.warn("Exception while Stopping one extra instance of {} - {}", Constants.Service.MASTER_SERVICES, e);
@Test public void testFailureRestart() throws Exception { TwillRunner runner = getTwillRunner(); ResourceSpecification resource = ResourceSpecification.Builder.with() .setVirtualCores(1) .setMemory(512, ResourceSpecification.SizeUnit.MEGA) .setInstances(2) .build(); TwillController controller = runner.prepare(new FailureRunnable(), resource) .withApplicationArguments("failure") .withArguments(FailureRunnable.class.getSimpleName(), "failure2") .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .start(); Iterable<Discoverable> discoverables = controller.discoverService("failure"); Assert.assertTrue(waitForSize(discoverables, 2, 120)); // Make sure we see the right instance IDs Assert.assertEquals(Sets.newHashSet(0, 1), getInstances(discoverables)); // Kill server with instanceId = 0 controller.sendCommand(FailureRunnable.class.getSimpleName(), Command.Builder.of("kill0").build()); // Make sure the runnable is killed. Assert.assertTrue(waitForSize(discoverables, 1, 120)); // Wait for the restart Assert.assertTrue(waitForSize(discoverables, 2, 120)); // Make sure we see the right instance IDs Assert.assertEquals(Sets.newHashSet(0, 1), getInstances(discoverables)); controller.terminate().get(120, TimeUnit.SECONDS); }
controller.changeInstances(FailingInstanceServer.class.getSimpleName(), 3); retriesExhausted.await(); Assert.assertEquals(3, retriesSeen.get()); controller.terminate().get();
private ClusterEnriched startClusterAsync(Cluster cluster) throws YarnProvisioningHandlingException { YarnConfiguration yarnConfiguration = new YarnConfiguration(); updateYarnConfiguration(cluster, yarnConfiguration); List<Property> props = cluster.getClusterConfig().getSubPropertyList(); // only to show those props on UI/API defaultsConfigurator.getDistroTypeDefaultsConfigurator( cluster.getClusterConfig().getDistroType(), cluster.getClusterConfig().getIsSecure()).mergeProperties(props); List<Property> cleansedProperties = new ArrayList<>(); for (Property prop : props) { if (!EXCLUDED.contains(prop.getKey())) { cleansedProperties.add(prop); } } // async call - unfortunately I can not add event handlers before start of TwillController // which means we can miss failures TwillController twillController = yarnController.startCluster(yarnConfiguration, cleansedProperties); String runId = twillController.getRunId().getId(); RunId dRunId = new RunId(runId); cluster.setState(ClusterState.STARTING); cluster.setRunId(dRunId); OnRunningRunnable onRunning = new OnRunningRunnable(cluster); twillController.onRunning(onRunning, Threads.SAME_THREAD_EXECUTOR); initOnTerminatingThread(cluster, twillController); return getClusterInfo(cluster); }
@Test public void testTaskCompleted() throws InterruptedException, TimeoutException, ExecutionException { TwillRunner twillRunner = getTwillRunner(); TwillController controller = twillRunner.prepare(new SleepTask(), ResourceSpecification.Builder.with() .setVirtualCores(1) .setMemory(512, ResourceSpecification.SizeUnit.MEGA) .setInstances(3).build()) .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .start(); final CountDownLatch runLatch = new CountDownLatch(1); controller.onRunning(new Runnable() { @Override public void run() { runLatch.countDown(); } }, Threads.SAME_THREAD_EXECUTOR); Assert.assertTrue(runLatch.await(1, TimeUnit.MINUTES)); controller.awaitTerminated(1, TimeUnit.MINUTES); Assert.assertEquals(ServiceController.TerminationStatus.SUCCEEDED, controller.getTerminationStatus()); }
void update(String flowletId, int newInstanceCount, int oldInstanceCount) throws Exception { FlowletDefinition flowletDefinition = program.getSpecification().getFlowlets().get(flowletId); int maxInstances = flowletDefinition.getFlowletSpec().getMaxInstances(); Preconditions.checkArgument(newInstanceCount <= maxInstances, "Flowlet %s can have a maximum of %s instances", flowletId, maxInstances); waitForInstances(flowletId, oldInstanceCount); twillController.sendCommand(flowletId, ProgramCommands.SUSPEND).get(); FlowUtils.reconfigure(consumerQueues.get(flowletId), FlowUtils.generateConsumerGroupId(program, flowletId), newInstanceCount, queueAdmin); twillController.changeInstances(flowletId, newInstanceCount).get(); twillController.sendCommand(flowletId, ProgramCommands.RESUME).get(); }
@Test public void testProvisionTimeout() throws InterruptedException, ExecutionException, TimeoutException, IOException { TwillRunner runner = getTwillRunner(); // Create a parent folder to be written by EventHandler#aborted() File parentFolder = TMP_FOLDER.newFolder(); parentFolder.setWritable(true, false); TwillController controller = runner.prepare(new TimeoutApplication(parentFolder.getAbsolutePath())) .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .start(); // The provision should failed in 30 seconds after AM started, which AM could took a while to start. // Hence we give 90 seconds max time here. try { controller.awaitTerminated(90, TimeUnit.SECONDS); // EventHandler#aborted() method should be called to create a file Assert.assertTrue(new File(parentFolder.getAbsolutePath(), ABORTED_FILE).exists()); String[] abortedFiles = parentFolder.list(); Assert.assertNotNull(abortedFiles); Assert.assertEquals(1, abortedFiles.length); } finally { // If it timeout, kill the app as cleanup. controller.kill(); } }