RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps).withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl()).withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark") .withInstanceCount(instanceCount).withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType).withSlaveInstanceType(instanceType));
public String createPersistentCluster(String clusterName) throws Exception { StepConfig[] steps = { debugStep }; instances.setKeepJobFlowAliveWhenNoSteps(true); RunJobFlowRequest that=new RunJobFlowRequest() .withName(clusterName) .withSteps(steps) .withLogUri(awsLogUri) .withInstances(instances); RunJobFlowResult result = runJob(that); pollClusterForCompletion(result, Sets.union(doneStates, Sets.newHashSet("WAITING"))); return result.getJobFlowId(); }
.withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")") .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0")) .withSteps(stepConfigs) .withBootstrapActions(bootstrapActions) .withApplications(applicationConfigs)
@Override public void runJob(MavenManagedJar defaultJar,List<String> jarArgs) throws Exception { String jarLocation=defaultJar.s3JarLocation(awsSoftwareBucket); List<String> appArgs=newArrayList(skip(jarArgs,2)); if(!validateJarArgs(appArgs)) { throw new Exception("Arguments to JAR were not valid"); } StepConfig[] steps = { debugStep, new StepConfig("main",new HadoopJarStepConfig(jarLocation).withArgs(jarArgs)) }; String jobName = computeJobName(jarArgs); RunJobFlowRequest that=new RunJobFlowRequest() .withName(jobName) .withBootstrapActions(bootstrapActions()) .withSteps(steps) .withLogUri(awsLogUri) .withInstances(instances); RunJobFlowResult result = runJob(that); pollClusterForCompletion(result); fetchLogs.run(new String[] {result.getJobFlowId()}); }
@Override public void runFlow(MavenManagedJar jar, Flow f,List<String> flowArgs) throws Exception { String jarLocation=jar.s3JarLocation(awsSoftwareBucket); List<StepConfig> steps = createEmrSteps(f, flowArgs, jarLocation); String jobName = computeJobName(flowArgs); RunJobFlowRequest that=new RunJobFlowRequest() .withName(jobName) .withBootstrapActions(bootstrapActions()) .withSteps(steps) .withLogUri(awsLogUri) .withInstances(instances); RunJobFlowResult result = runJob(that); String jobFlowId=result.getJobFlowId(); pollClusterForCompletion(result); fetchLogs.run(new String[] {jobFlowId}); alertService.alert("Cluster execution complete:\n"+jobName); }