protected final JavaStreamingContext buildStreamingContext() { log.info("Starting SparkContext with interval {} seconds", generationIntervalSec); SparkConf sparkConf = new SparkConf(); // Only for tests, really if (sparkConf.getOption("spark.master").isEmpty()) { log.info("Overriding master to {} for tests", streamingMaster); sparkConf.setMaster(streamingMaster); } // Only for tests, really if (sparkConf.getOption("spark.app.name").isEmpty()) { String appName = "Oryx" + getLayerName(); if (id != null) { appName = appName + "-" + id; } log.info("Overriding app name to {} for tests", appName); sparkConf.setAppName(appName); } extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString())); // Turn this down to prevent long blocking at shutdown sparkConf.setIfMissing( "spark.streaming.gracefulStopTimeout", Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS))); sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec)); long generationIntervalMS = TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf)); return new JavaStreamingContext(jsc, new Duration(generationIntervalMS)); }
@BeforeClass public static void setUp() { SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkIT"); javaSparkContext = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf)); }
.getOrCreate(); JavaSparkContext context = JavaSparkContext.fromSparkContext(session.sparkContext());
.getOrCreate(); JavaSparkContext context = JavaSparkContext.fromSparkContext(session.sparkContext());
.getOrCreate(); JavaSparkContext context = JavaSparkContext.fromSparkContext(session.sparkContext());
private void initContext() { if (session == null) { String jar = ""; try { jar = KMeansRunner.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath(); } catch (final URISyntaxException e) { LOGGER.error("Unable to set jar location in spark configuration", e); } session = GeoWaveSparkConf.createSessionFromParams(appName, master, host, jar); jsc = JavaSparkContext.fromSparkContext(session.sparkContext()); } }
@Override public JavaRDD<U> transform(SparkExecutionPluginContext context, JavaRDD<T> input) throws Exception { lazyInit(JavaSparkContext.fromSparkContext(input.context())); return delegate.transform(context, input); }
protected final JavaStreamingContext buildStreamingContext() { log.info("Starting SparkContext with interval {} seconds", generationIntervalSec); SparkConf sparkConf = new SparkConf(); // Only for tests, really if (sparkConf.getOption("spark.master").isEmpty()) { log.info("Overriding master to {} for tests", streamingMaster); sparkConf.setMaster(streamingMaster); } // Only for tests, really if (sparkConf.getOption("spark.app.name").isEmpty()) { String appName = "Oryx" + getLayerName(); if (id != null) { appName = appName + "-" + id; } log.info("Overriding app name to {} for tests", appName); sparkConf.setAppName(appName); } extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString())); // Turn this down to prevent long blocking at shutdown sparkConf.setIfMissing( "spark.streaming.gracefulStopTimeout", Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS))); sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec)); long generationIntervalMS = TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf)); return new JavaStreamingContext(jsc, new Duration(generationIntervalMS)); }
@Override public void run(DatasetContext datasetContext) throws Exception { PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec); SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, pipelineRuntime, stageSpec); wrappedCompute.initialize(sparkPluginContext); } }, Exception.class);
Integer.toString(numCores)).getOrCreate(); jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
public static Dataset<Row> readJdbc(String jdbcUrlWithPassword, String sql, SparkSession spark) { JavaRDD<Tuple2<List<Row>, StructType>> javaRdd1 = JavaSparkContext.fromSparkContext(spark.sparkContext()) .parallelize(Arrays.asList(0)) .map(new Function<Integer, Tuple2<List<Row>, StructType>>() { @Override public Tuple2<List<Row>, StructType> call(Integer v1) throws Exception { Tuple2<List<Row>, StructType> tuple = new ExponentialBackoffRetryPolicy<Tuple2<List<Row>, StructType>>(3, 100) .attempt(() -> readJdbcAndReturnRowsAndSchema(jdbcUrlWithPassword, sql)); return tuple; } }); javaRdd1 = javaRdd1.persist(StorageLevel.DISK_ONLY_2()); StructType structType = javaRdd1.map(new Function<Tuple2<List<Row>, StructType>, StructType>() { @Override public StructType call(Tuple2<List<Row>, StructType> v1) throws Exception { return v1._2(); } }).collect().get(0); JavaRDD<Row> javaRdd2 = javaRdd1.flatMap(new FlatMapFunction<Tuple2<List<Row>, StructType>, Row>() { @Override public Iterator<Row> call(Tuple2<List<Row>, StructType> listStructTypeTuple2) throws Exception { return listStructTypeTuple2._1().iterator(); } }); return spark.createDataFrame(javaRdd2, structType); //SqlUtils.loadJdbcDriverClass(jdbcUrlWithPassword); //DriverRegistryWrapper.register(com.mysql.jdbc.Driver.class.getName()); //ExponentialBackoffRetryPolicy<Dataset<Row>> retryPolicy = new ExponentialBackoffRetryPolicy<>(3, 100); //return retryPolicy.attempt(() -> readJdbcWithoutRetry(jdbcUrlWithPassword, sql, spark)); }
@NonNull final ErrorExtractor errorExtractor) { JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); final ErrorTableConfiguration errorTableConf = new ErrorTableConfiguration(conf); if (!errorTableConf.isEnabled()) {
@Override public JavaRDD<U> call(JavaRDD<T> data, Time batchTime) throws Exception { SparkExecutionPluginContext sparkPluginContext = new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.context()), batchTime.milliseconds(), stageSpec); String stageName = stageSpec.getName(); data = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)); return compute.transform(sparkPluginContext, data) .map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName))); } }
private JavaRDD<Element> doOperation(final GetJavaRDDOfElements operation, final Context context, final AccumuloStore accumuloStore) throws OperationException { final JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext(SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext()); final Configuration conf = getConfiguration(operation); // Use batch scan option when performing seeded operation InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true); addIterators(accumuloStore, conf, context.getUser(), operation); addRanges(accumuloStore, conf, operation); final JavaPairRDD<Element, NullWritable> pairRDD = sparkContext.newAPIHadoopRDD(conf, ElementInputFormat.class, Element.class, NullWritable.class); final JavaRDD<Element> rdd = pairRDD.map(new FirstElement()); return rdd; }
private JavaRDD<Element> doOperation(final GetJavaRDDOfElementsInRanges operation, final Context context, final AccumuloStore accumuloStore) throws OperationException { final JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext(SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext()); final Configuration conf = getConfiguration(operation); // Use batch scan option when performing seeded operation InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true); addIterators(accumuloStore, conf, context.getUser(), operation); addRangesFromPairs(accumuloStore, conf, operation); final JavaPairRDD<Element, NullWritable> pairRDD = sparkContext.newAPIHadoopRDD(conf, ElementInputFormat.class, Element.class, NullWritable.class); final JavaRDD<Element> rdd = pairRDD.map(new FirstElement()); return rdd; }
public void doOperation(final ImportJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException { final String outputPath = operation.getOption(OUTPUT_PATH); if (null == outputPath || outputPath.isEmpty()) { throw new OperationException("Option outputPath must be set for this option to be run against the accumulostore"); } final String failurePath = operation.getOption(FAILURE_PATH); if (null == failurePath || failurePath.isEmpty()) { throw new OperationException("Option failurePath must be set for this option to be run against the accumulostore"); } final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, store.getProperties()).sparkContext(); final Broadcast<AccumuloElementConverter> broadcast = JavaSparkContext.fromSparkContext(sparkContext).broadcast(store.getKeyPackage().getKeyConverter()); final ElementConverterFunction func = new ElementConverterFunction(broadcast); final JavaPairRDD<Key, Value> rdd = operation.getInput().flatMapToPair(func); final ImportKeyValueJavaPairRDDToAccumulo op = new ImportKeyValueJavaPairRDDToAccumulo.Builder() .input(rdd) .failurePath(failurePath) .outputPath(outputPath) .build(); store.execute(new OperationChain(op), context); } }
final JavaSparkContext javaSC = JavaSparkContext.fromSparkContext(sc);
= new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.rdd().context()), logicalStartTime, stageSpec); final JavaRDD<T> countedRDD = data.map(new CountingFunction<T>(stageName, sec.getMetrics(),