org.apache.spark.launcher.SparkLauncher java code examples

@Test
public void testChildProcLauncher() throws Exception {
 // This test is failed on Windows due to the failure of initiating executors
 // by the path length limitation. See SPARK-18718.
 assumeTrue(!Utils.isWindows());
 SparkSubmitOptionParser opts = new SparkSubmitOptionParser();
 Map<String, String> env = new HashMap<>();
 env.put("SPARK_PRINT_LAUNCH_COMMAND", "1");
 launcher
  .setMaster("local")
  .setAppResource(SparkLauncher.NO_RESOURCE)
  .addSparkArg(opts.CONF,
   String.format("%s=-Dfoo=ShouldBeOverriddenBelow", SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS))
  .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
   "-Dfoo=bar -Dtest.appender=console")
  .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path"))
  .addSparkArg(opts.CLASS, "ShouldBeOverriddenBelow")
  .setMainClass(SparkLauncherTestApp.class.getName())
  .redirectError()
  .addAppArgs("proc");
 final Process app = launcher.launch();
 new OutputRedirector(app.getInputStream(), getClass().getName() + ".child", TF);
 assertEquals(0, app.waitFor());
}

@Before
public void configureLauncher() {
 launcher = new SparkLauncher().setSparkHome(System.getProperty("spark.test.home"));
}

launcher.addSparkArg(opts.HELP);
try {
 launcher.addSparkArg(opts.PROXY_USER);
 fail("Expected IllegalArgumentException.");
} catch (IllegalArgumentException e) {
launcher.addSparkArg(opts.PROXY_USER, "someUser");
try {
 launcher.addSparkArg(opts.HELP, "someValue");
 fail("Expected IllegalArgumentException.");
} catch (IllegalArgumentException e) {
launcher.addSparkArg("--future-argument");
launcher.addSparkArg("--future-argument", "someValue");
launcher.addSparkArg(opts.MASTER, "myMaster");
assertEquals("myMaster", launcher.builder.master);
launcher.addJar("foo");
launcher.addSparkArg(opts.JARS, "bar");
assertEquals(Arrays.asList("bar"), launcher.builder.jars);
launcher.addFile("foo");
launcher.addSparkArg(opts.FILES, "bar");
assertEquals(Arrays.asList("bar"), launcher.builder.files);
launcher.addPyFile("foo");
launcher.addSparkArg(opts.PY_FILES, "bar");
assertEquals(Arrays.asList("bar"), launcher.builder.pyFiles);

@Test(expected=IllegalStateException.class)
public void testRedirectTwiceFails() throws Exception {
 launcher.setAppResource("fake-resource.jar")
  .setMainClass("my.fake.class.Fake")
  .redirectError()
  .redirectError(ProcessBuilder.Redirect.PIPE)
  .launch();
}

SparkLauncher spark = new SparkLauncher()
    .setVerbose(true)
    .setJavaHome(javaHome)
    .setSparkHome(sparkHome)
    .setAppResource(appResource)    // "/my/app.jar"
    .setMainClass(mainClass)        // "my.spark.app.Main"
    .setMaster("local")
    .setConf(SparkLauncher.DRIVER_MEMORY, "1g")
    .addAppArgs(appArgs);
Process proc = spark.launch();

SparkSubmitOptionParser validator = new ArgumentValidator(true);
if (validator.MASTER.equals(name)) {
 setMaster(value);
} else if (validator.PROPERTIES_FILE.equals(name)) {
 setPropertiesFile(value);
} else if (validator.CONF.equals(name)) {
 String[] vals = value.split("=", 2);
 setConf(vals[0], vals[1]);
} else if (validator.CLASS.equals(name)) {
 setMainClass(value);
} else if (validator.JARS.equals(name)) {
 builder.jars.clear();
 for (String jar : value.split(",")) {
  addJar(jar);
  addFile(file);
  addPyFile(file);

env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath());
final SparkLauncher sparkLauncher = new SparkLauncher(env);
sparkLauncher.setSparkHome(_sparkHome);
sparkLauncher.setMaster("yarn-cluster");
sparkLauncher.setAppName("DataCleaner");
  sparkLauncher.addJar(jar);
sparkLauncher.setMainClass(Main.class.getName());
sparkLauncher.setConf("spark.serializer", "org.apache.spark.serializer.JavaSerializer");
sparkLauncher.addAppArgs(primaryJar.get());
sparkLauncher.addAppArgs(toHadoopPath(configurationHdfsPath));
sparkLauncher.addAppArgs(toHadoopPath(jobHdfsPath));
  final URI uri = copyFileToHdfs(tempFile,
      _fileSystem.getHomeDirectory().toUri().resolve("temp/" + tempFile.getName()).toString());
  sparkLauncher.addAppArgs(uri.toString());

launcher = new SparkLauncher()
    .setAppResource(this.deploymentConfig.getJarFile())
    .setMainClass(this.getClass().getName())
    .addAppArgs(enc)
    .setMaster(this.deploymentConfig.getSparkHost())
    .setConf(SparkLauncher.DRIVER_MEMORY, "2g");//TODO
appHandle = launcher.startApplication();

@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
  + "repaired files to replace with")
public String deduplicate(@CliOption(key = {
  "duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String
  duplicatedPartitionPath,
  @CliOption(key = {
    "repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String
    repairedOutputPath,
  @CliOption(key = {
    "sparkProperties"}, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
  throws Exception {
 SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
 sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath,
   HoodieCLI.tableMetadata.getBasePath());
 Process process = sparkLauncher.launch();
 InputStreamConsumer.captureOutput(process);
 int exitCode = process.waitFor();
 if (exitCode != 0) {
  return "Deduplicated files placed in:  " + repairedOutputPath;
 }
 return "Deduplication failed ";
}

 return new ChildProcess(conf, promise, child, confFile);
} else {
 final SparkLauncher launcher = new SparkLauncher();
  launcher.setDeployMode(deployMode);
 launcher.setSparkHome(System.getenv(SPARK_HOME_ENV));
 launcher.setAppResource("spark-internal");
 launcher.setPropertiesFile(confFile.getAbsolutePath());
 launcher.setMainClass(RSCDriverBootstrapper.class.getName());
  launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
 return new ChildProcess(conf, promise, launcher.launch(), confFile);

/**
 * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
 */
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
 String currentJar = new File(SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
   .getAbsolutePath();
 SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar).setMainClass(SparkMain.class.getName())
   .setPropertiesFile(propertiesFile);
 File libDirectory = new File(new File(currentJar).getParent(), "lib");
 for (String library : libDirectory.list()) {
  sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
 }
 return sparkLauncher;
}

 Process spark = new SparkLauncher()
  .setSparkHome("C:\\spark-1.4.1-bin-hadoop2.6")
  .setAppResource("C:\\spark-1.4.1-bin-hadoop2.6\\lib\\spark-examples-1.4.1-hadoop2.6.0.jar")
  .setMainClass("org.apache.spark.examples.SparkPi").setMaster("yarn-cluster").launch();

InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(spark.getInputStream(), "input");
Thread inputThread = new Thread(inputStreamReaderRunnable, "LogStreamReader input");
inputThread.start();

InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(spark.getErrorStream(), "error");
Thread errorThread = new Thread(errorStreamReaderRunnable, "LogStreamReader error");
errorThread.start();

System.out.println("Waiting for finish...");
int exitCode = spark.waitFor();
System.out.println("Finished! Exit code:" + exitCode);

@Test(expected = IllegalArgumentException.class)
public void testBadLogRedirect() throws Exception {
 File out = Files.createTempFile("stdout", "txt").toFile();
 out.deleteOnExit();
 new SparkLauncher()
  .redirectError()
  .redirectOutput(out)
  .redirectToLog("foo")
  .launch()
  .waitFor();
}

/**
 * Sets all output to be logged and redirected to a logger with the specified name.
 *
 * @param loggerName The name of the logger to log stdout and stderr.
 * @return This launcher.
 */
public SparkLauncher redirectToLog(String loggerName) {
 setConf(CHILD_PROCESS_LOGGER_NAME, loggerName);
 return this;
}

@Test(expected = IllegalArgumentException.class)
public void testRedirectErrorTwiceFails() throws Exception {
 File err = Files.createTempFile("stderr", "txt").toFile();
 err.deleteOnExit();
 new SparkLauncher()
  .redirectError()
  .redirectError(err)
  .launch()
  .waitFor();
}

@Test
public void testRedirectLastWins() throws Exception {
 SparkLauncher launcher = new SparkLauncher();
 launcher.redirectError(ProcessBuilder.Redirect.PIPE)
  .redirectError(ProcessBuilder.Redirect.INHERIT);
 assertEquals(launcher.errorStream.type(), ProcessBuilder.Redirect.Type.INHERIT);
 launcher.redirectOutput(ProcessBuilder.Redirect.PIPE)
  .redirectOutput(ProcessBuilder.Redirect.INHERIT);
 assertEquals(launcher.outputStream.type(), ProcessBuilder.Redirect.Type.INHERIT);
}

public Process launchProcess(final SparkLauncher sparkLauncher) throws IOException {
  final Process process = sparkLauncher.launch();
  final InputStream errorStream = process.getErrorStream();
  startLogger(errorStream);
  final InputStream inputStream = process.getInputStream();
  startLogger(inputStream);
  return process;
}

SparkLauncher spark = new SparkLauncher()
    .setVerbose(true)
    .setJavaHome(javaHome)
    .setSparkHome(sparkHome)
    .setAppResource(appResource)    // "/my/app.jar"
    .setMainClass(mainClass)        // "my.spark.app.Main"
    .setMaster("local")
    .setConf(SparkLauncher.DRIVER_MEMORY, "1g")
    .addAppArgs(appArgs);
Process proc = spark.launch();

SparkSubmitOptionParser validator = new ArgumentValidator(true);
if (validator.MASTER.equals(name)) {
 setMaster(value);
} else if (validator.PROPERTIES_FILE.equals(name)) {
 setPropertiesFile(value);
} else if (validator.CONF.equals(name)) {
 String[] vals = value.split("=", 2);
 setConf(vals[0], vals[1]);
} else if (validator.CLASS.equals(name)) {
 setMainClass(value);
} else if (validator.JARS.equals(name)) {
 builder.jars.clear();
 for (String jar : value.split(",")) {
  addJar(jar);
  addFile(file);
  addPyFile(file);

@CliCommand(value = "compaction schedule", help = "Schedule Compaction")
public String scheduleCompact(
  @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G", help = "Spark executor memory")
  final String sparkMemory) throws Exception {
 boolean initialized = HoodieCLI.initConf();
 HoodieCLI.initFS(initialized);
 // First get a compaction instant time and pass it to spark launcher for scheduling compaction
 String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime();
 if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
  String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
    scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
  SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
  sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), HoodieCLI.tableMetadata.getBasePath(),
    HoodieCLI.tableMetadata.getTableConfig().getTableName(), compactionInstantTime, sparkMemory);
  Process process = sparkLauncher.launch();
  InputStreamConsumer.captureOutput(process);
  int exitCode = process.waitFor();
  if (exitCode != 0) {
   return "Failed to run compaction for " + compactionInstantTime;
  }
  return "Compaction successfully completed for " + compactionInstantTime;
 } else {
  throw new Exception("Compactions can only be run for table type : MERGE_ON_READ");
 }
}

Javadoc

Launcher for Spark applications.

Use this class to start Spark applications programmatically. The class uses a builder pattern to allow clients to configure the Spark application and launch it as a child process.

Most used methods

setConf
Set a single configuration value for the application.
setMainClass
Sets the application class name for Java/Scala applications.
<init>
Creates a launcher that will set the given environment variables in the child.
launch
Launches a sub-process that will start the configured Spark application. The #startApplication(Spark
setMaster
Set the Spark master for the application.
addAppArgs
Adds command line arguments for the application.
addJar
Adds a jar file to be submitted with the application.
setAppResource
Set the main application resource. This should be the location of a jar file for Scala/Java applicat
addFile
Adds a file to be submitted with the application.
addPyFile
Adds a python file / zip / egg to be submitted with the application.
addSparkArg
Adds an argument with a value to the Spark invocation. If the argument name corresponds to a known a
createBuilder

Popular in Java

Reading from database using SQL prepared statement
onRequestPermissionsResult (Fragment)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
addToBackStack (FragmentTransaction)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top 12 Jupyter Notebook extensions

How to useSparkLauncher in org.apache.spark.launcher

Best Java code snippets using org.apache.spark.launcher.SparkLauncher (Showing top 20 results out of 315)

How to use
SparkLauncher
in
org.apache.spark.launcher