@Override public void run(final SparkExecutionPluginContext context, JavaRDD<StructuredRecord> input) throws Exception { JavaPairRDD<byte[], byte[]> tableData = input.mapToPair(new PairFunction<StructuredRecord, byte[], byte[]>() { @Override public Tuple2<byte[], byte[]> call(StructuredRecord record) throws Exception { return new Tuple2<>(Bytes.toBytes((String) record.get("id")), Bytes.toBytes((String) record.get("name"))); } }); context.saveAsDataset(tableData, config.tableName); }
@Override public void initialize(SparkExecutionPluginContext context) throws Exception { // should never happen, here to test app correctness in unit tests Schema inputSchema = context.getInputSchema(); if (inputSchema != null && !inputSchema.equals(context.getOutputSchema())) { throw new IllegalStateException("runtime schema does not match what was set at configure time."); } interpreter = context.createSparkInterpreter(); interpreter.compile( "package test\n" + "import co.cask.cdap.api.data.format._\n" + "import org.apache.spark._\n" + "import org.apache.spark.api.java._\n" + "import org.apache.spark.rdd._\n" + "object Compute {\n" + " def compute(rdd: RDD[StructuredRecord]): JavaRDD[StructuredRecord] = {\n" + " val value = \"" + conf.value + "\"\n" + " val field = \"" + conf.field + "\"\n" + " JavaRDD.fromRDD(rdd.filter(r => !value.equals(r.get(field))))\n" + " }\n" + "}" ); computeMethod = interpreter.getClassLoader().loadClass("test.Compute").getDeclaredMethod("compute", RDD.class); }