@Override public JsonElement serialize(SparkSpecification src, Type typeOfSrc, JsonSerializationContext context) { JsonObject jsonObj = new JsonObject(); jsonObj.add("className", new JsonPrimitive(src.getClassName())); jsonObj.add("name", new JsonPrimitive(src.getName())); jsonObj.add("description", new JsonPrimitive(src.getDescription())); jsonObj.add("plugins", serializeMap(src.getPlugins(), context, Plugin.class)); if (src.getMainClassName() != null) { jsonObj.add("mainClassName", new JsonPrimitive(src.getMainClassName())); } jsonObj.add("datasets", serializeSet(src.getDatasets(), context, String.class)); jsonObj.add("properties", serializeMap(src.getProperties(), context, String.class)); serializeResources(jsonObj, "client", context, src.getClientResources()); serializeResources(jsonObj, "driver", context, src.getDriverResources()); serializeResources(jsonObj, "executor", context, src.getExecutorResources()); jsonObj.add("handlers", serializeList(src.getHandlers(), context, SparkHttpServiceHandlerSpecification.class)); return jsonObj; }
public SparkSpecification createSpecification() { Set<String> datasets = new HashSet<>(); // Grab all @Property and @Dataset fields Reflections.visit(spark, spark.getClass(), new PropertyFieldExtractor(properties), new DataSetFieldExtractor(datasets)); return new SparkSpecification(spark.getClass().getName(), name, description, mainClassName, datasets, properties, clientResources, driverResources, executorResources, getHandlers(), getPlugins()); }
BasicSparkClientContext(SparkRuntimeContext sparkRuntimeContext) { this.sparkRuntimeContext = sparkRuntimeContext; this.localizeResources = new HashMap<>(); this.additionalPythonLocations = new LinkedList<>(); SparkSpecification spec = sparkRuntimeContext.getSparkSpecification(); this.driverResources = SystemArguments.getResources(getDriverRuntimeArguments(), spec.getDriverResources()); this.executorResources = SystemArguments.getResources(getExecutorRuntimeArguments(), spec.getExecutorResources()); }
builder.add("--conf").add("spark.app.name=" + spec.getName()); builder.add("--cdap.user.main.class=" + spec.getMainClassName());
options.getUserArguments().asMap()); launchConfig.addRunnable(spec.getName(), new SparkTwillRunnable(spec.getName()), 1, clientArgs, spec.getClientResources(), 0);
@Override public Thread newThread(Runnable r) { return new Thread(r, "spark-submitter-" + spec.getName() + "-" + runtimeContext.getRunId()); } });
private void registerDatasets(ApplicationWithPrograms input) { ApplicationSpecification appSpec = input.getSpecification(); ApplicationId appId = input.getApplicationId(); NamespaceId namespaceId = appId.getParent(); for (MapReduceSpecification program : appSpec.getMapReduce().values()) { ProgramId programId = appId.mr(program.getName()); for (String dataset : program.getDataSets()) { usageRegistry.register(programId, namespaceId.dataset(dataset)); } } for (SparkSpecification sparkSpec : appSpec.getSpark().values()) { ProgramId programId = appId.spark(sparkSpec.getName()); for (String dataset : sparkSpec.getDatasets()) { usageRegistry.register(programId, namespaceId.dataset(dataset)); } } for (ServiceSpecification serviceSpecification : appSpec.getServices().values()) { ProgramId programId = appId.service(serviceSpecification.getName()); for (HttpServiceHandlerSpecification handlerSpecification : serviceSpecification.getHandlers().values()) { for (String dataset : handlerSpecification.getDatasets()) { usageRegistry.register(programId, namespaceId.dataset(dataset)); } } } } }
new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
@Override protected List<SparkHandlerDelegatorContext> createDelegatorContexts() throws Exception { List<SparkHandlerDelegatorContext> contexts = new ArrayList<>(); InstantiatorFactory instantiatorFactory = new InstantiatorFactory(false); for (SparkHttpServiceHandlerSpecification spec : context.getSpecification().getHandlers()) { Class<?> handlerClass = getProgram().getClassLoader().loadClass(spec.getClassName()); @SuppressWarnings("unchecked") TypeToken<SparkHttpServiceHandler> type = TypeToken.of((Class<SparkHttpServiceHandler>) handlerClass); MetricsContext handlerMetricsContext = runtimeContext.getProgramMetrics().childContext( Constants.Metrics.Tag.HANDLER, handlerClass.getSimpleName()); contexts.add(new SparkHandlerDelegatorContext(type, instantiatorFactory, spec, runtimeContext.getProgramMetrics(), handlerMetricsContext)); } return contexts; }
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create(); } catch (Exception e) { LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e); throw Throwables.propagate(e);
arguments); DataStreamsPipelineSpec spec = GSON.fromJson(context.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
builder.add("--conf").add("spark.app.name=" + spec.getName()); builder.add("--cdap.user.main.class=" + spec.getMainClassName());
options.getUserArguments().asMap()); launchConfig.addRunnable(spec.getName(), new SparkTwillRunnable(spec.getName()), 1, clientArgs, spec.getClientResources(), 0);
@Override public Thread newThread(Runnable r) { return new Thread(r, "spark-submitter-" + spec.getName() + "-" + runtimeContext.getRunId()); } });
ProgramId programId = appId.spark(sparkSpec.getName()); for (String dataset : sparkSpec.getDatasets()) { usageRegistry.register(programId, namespaceId.dataset(dataset));
BasicSparkClientContext(SparkRuntimeContext sparkRuntimeContext) { this.sparkRuntimeContext = sparkRuntimeContext; this.localizeResources = new HashMap<>(); this.additionalPythonLocations = new LinkedList<>(); SparkSpecification spec = sparkRuntimeContext.getSparkSpecification(); this.driverResources = SystemArguments.getResources(getDriverRuntimeArguments(), spec.getDriverResources()); this.executorResources = SystemArguments.getResources(getExecutorRuntimeArguments(), spec.getExecutorResources()); }
new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
@Override protected List<SparkHandlerDelegatorContext> createDelegatorContexts() throws Exception { List<SparkHandlerDelegatorContext> contexts = new ArrayList<>(); InstantiatorFactory instantiatorFactory = new InstantiatorFactory(false); for (SparkHttpServiceHandlerSpecification spec : context.getSpecification().getHandlers()) { Class<?> handlerClass = getProgram().getClassLoader().loadClass(spec.getClassName()); @SuppressWarnings("unchecked") TypeToken<SparkHttpServiceHandler> type = TypeToken.of((Class<SparkHttpServiceHandler>) handlerClass); MetricsContext handlerMetricsContext = runtimeContext.getProgramMetrics().childContext( Constants.Metrics.Tag.HANDLER, handlerClass.getSimpleName()); contexts.add(new SparkHandlerDelegatorContext(type, instantiatorFactory, spec, runtimeContext.getProgramMetrics(), handlerMetricsContext)); } return contexts; }
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create(); } catch (Exception e) { LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e); throw Throwables.propagate(e);
@Override protected void initialize() throws Exception { SparkClientContext context = getContext(); String stageName = context.getSpecification().getProperty(STAGE_NAME); Class<?> externalProgramClass = context.loadPluginClass(stageName); // If the external program implements Spark, instantiate it and call initialize() to provide full lifecycle support if (Spark.class.isAssignableFrom(externalProgramClass)) { MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(new BasicArguments(context), context.getLogicalStartTime(), context, context.getNamespace()); delegateSpark = context.newPluginInstance(stageName, macroEvaluator); if (delegateSpark instanceof AbstractSpark) { //noinspection unchecked ((AbstractSpark) delegateSpark).initialize(context); } } }