public static JavaSparkContext createConf() { SparkConf sparkConf = new SparkConf(); sparkConf.setAppName("animalClass"); JavaSparkContext sc = new JavaSparkContext(sparkConf); return sc; } }
public static void main(String[] args) throws Exception { String master; if (args.length > 0) { master = args[0]; } else { master = "local"; } JavaSparkContext sc = new JavaSparkContext( master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); Integer result = rdd.fold(0, new Function2<Integer, Integer, Integer>() { public Integer call(Integer x, Integer y) { return x + y;}}); System.out.println(result); } }
public static void main(String[] args) { String master; if (args.length > 0) { master = args[0]; } else { master = "local"; } JavaSparkContext sc = new JavaSparkContext( master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaDoubleRDD input = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 1000.0)); JavaDoubleRDD result = removeOutliers(input); System.out.println(StringUtils.join(result.collect(), ",")); } static JavaDoubleRDD removeOutliers(JavaDoubleRDD rdd) {
public static void main(String[] args) throws Exception { String master; if (args.length > 0) { master = args[0]; } else { master = "local"; } JavaSparkContext sc = new JavaSparkContext( master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); JavaRDD<Integer> result = rdd.map( new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}}); System.out.println(StringUtils.join(result.collect(), ",")); } }
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage LoadJsonWithSparkSQL sparkMaster jsonFile"); } String master = args[0]; String jsonFile = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadJsonwithsparksql"); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(jsonFile); input.printSchema(); } }
@Before public void setUp() { sc = new JavaSparkContext("local", "JavaAPISuite"); tempDir = Files.createTempDir(); tempDir.deleteOnExit(); }
@Before public void setUp() { sc = new JavaSparkContext("local", "JavaAPISuite"); tempDir = Files.createTempDir(); tempDir.deleteOnExit(); }
@Before public void setUp() { sc = new JavaSparkContext("local", "JavaAPISuite"); tempDir = Files.createTempDir(); tempDir.deleteOnExit(); }
@Test public void javaSparkContext() { String[] jars = new String[] {}; java.util.Map<String, String> environment = new java.util.HashMap<>(); new JavaSparkContext(new SparkConf().setMaster("local").setAppName("name")).stop(); new JavaSparkContext("local", "name", new SparkConf()).stop(); new JavaSparkContext("local", "name").stop(); new JavaSparkContext("local", "name", "sparkHome", "jarFile").stop(); new JavaSparkContext("local", "name", "sparkHome", jars).stop(); new JavaSparkContext("local", "name", "sparkHome", jars, environment).stop(); }
public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage BasicLoadJson [sparkMaster] [jsoninput] [jsonoutput]"); } String master = args[0]; String fileName = args[1]; String outfile = args[2]; JavaSparkContext sc = new JavaSparkContext( master, "basicloadjson", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<String> input = sc.textFile(fileName); JavaRDD<Person> result = input.mapPartitions(new ParseJson()).filter(new LikesPandas()); JavaRDD<String> formatted = result.mapPartitions(new WriteJson()); formatted.saveAsTextFile(outfile); } }
@Test public void javaSparkContext() { String[] jars = new String[] {}; java.util.Map<String, String> environment = new java.util.HashMap<>(); new JavaSparkContext(new SparkConf().setMaster("local").setAppName("name")).stop(); new JavaSparkContext("local", "name", new SparkConf()).stop(); new JavaSparkContext("local", "name").stop(); new JavaSparkContext("local", "name", "sparkHome", "jarFile").stop(); new JavaSparkContext("local", "name", "sparkHome", jars).stop(); new JavaSparkContext("local", "name", "sparkHome", jars, environment).stop(); }
public void run(String master, String csv1, String csv2) throws Exception { JavaSparkContext sc = new JavaSparkContext( master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<String> csvFile1 = sc.textFile(csv1); JavaRDD<String> csvFile2 = sc.textFile(csv2); JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine()); JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine()); JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2); List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect(); } }
private LocalHiveSparkClient(SparkConf sparkConf) { sc = new JavaSparkContext(sparkConf); jobMetricsListener = new JobMetricsListener(); sc.sc().listenerBus().addListener(jobMetricsListener); }
@Test public void testPluginClassDoesNotExist() { SparkConf conf = initializeSparkConf("nonexistant.plugin"); try { sc = new JavaSparkContext(conf); fail("No exception thrown for nonexistant plugin"); } catch (Exception e) { // We cannot catch ClassNotFoundException directly because Java doesn't think it'll be thrown assertTrue(e.toString().startsWith("java.lang.ClassNotFoundException")); } }
@Test public void testPluginClassDoesNotExist() { SparkConf conf = initializeSparkConf("nonexistant.plugin"); try { sc = new JavaSparkContext(conf); fail("No exception thrown for nonexistant plugin"); } catch (Exception e) { // We cannot catch ClassNotFoundException directly because Java doesn't think it'll be thrown assertTrue(e.toString().startsWith("java.lang.ClassNotFoundException")); } }
@Test public void testAddPlugin() throws InterruptedException { // Load the sample TestExecutorPlugin, which will change the value of numSuccessfulPlugins SparkConf conf = initializeSparkConf(testPluginName); sc = new JavaSparkContext(conf); assertEquals(1, numSuccessfulPlugins); sc.stop(); sc = null; assertEquals(1, numSuccessfulTerminations); }
@Test public void testPluginShutdownWithException() { // Verify an exception in one plugin shutdown does not affect the others String pluginNames = testPluginName + "," + testBadPluginName + "," + testPluginName; SparkConf conf = initializeSparkConf(pluginNames); sc = new JavaSparkContext(conf); assertEquals(3, numSuccessfulPlugins); sc.stop(); sc = null; assertEquals(2, numSuccessfulTerminations); }
@Test public void testAddMultiplePlugins() throws InterruptedException { // Load two plugins and verify they both execute. SparkConf conf = initializeSparkConf(testPluginName + "," + testSecondPluginName); sc = new JavaSparkContext(conf); assertEquals(2, numSuccessfulPlugins); sc.stop(); sc = null; assertEquals(2, numSuccessfulTerminations); }
@Test public void testPluginShutdownWithException() { // Verify an exception in one plugin shutdown does not affect the others String pluginNames = testPluginName + "," + testBadPluginName + "," + testPluginName; SparkConf conf = initializeSparkConf(pluginNames); sc = new JavaSparkContext(conf); assertEquals(3, numSuccessfulPlugins); sc.stop(); sc = null; assertEquals(2, numSuccessfulTerminations); }
@Test public void testAddMultiplePlugins() throws InterruptedException { // Load two plugins and verify they both execute. SparkConf conf = initializeSparkConf(testPluginName + "," + testSecondPluginName); sc = new JavaSparkContext(conf); assertEquals(2, numSuccessfulPlugins); sc.stop(); sc = null; assertEquals(2, numSuccessfulTerminations); }