@Override public void saveDataFrame(DataFrame dataFrame, Class<?> entityClazz, Map<String, Object> properties) { dataFrame.sqlContext().sql("use " + (String) properties.get(KEYSPACE)); dataFrame.write().insertInto((String) properties.get(TABLE)); } }
private void preConditions(DataFrame df) { if (null != df) { if (!StringUtils.startsWith(df.sqlContext().sparkContext().version(), Constants.SUPPORTED_SPARK_VERSION_PREFIX)) { throw new UnsupportedOperationException("Only spark version " + Constants.SUPPORTED_SPARK_VERSION_PREFIX + " is supported by this version of the library"); } } }
@Override public DataFrame transform(DataFrame dataset) { JavaRDD<Row> output = dataset.javaRDD().map(new DecodeFunction()); StructType schema = new StructType(new StructField[]{ new StructField("sentence", DataTypes.StringType, false, Metadata.empty()), new StructField("prediction", DataTypes.StringType, false, Metadata.empty()) }); return dataset.sqlContext().createDataFrame(output, schema); }
ContextExtractor contextExtractor = new ContextExtractor(order, Constants.REGEXP_FILE); JavaRDD<LabeledContext> contexts = contextExtractor.extract(dataset.javaRDD()); DataFrame dataFrame = dataset.sqlContext().createDataFrame(contexts, LabeledContext.class); Row row = df.sqlContext().sql("SELECT MAX(label) as maxValue FROM dft").first(); this.numLabels = (int)row.getDouble(0) + 1; JavaRDD<Row> rows = df.sqlContext().sql("SELECT label, features FROM dft").toJavaRDD();
private void writeDataFrameToDAL(DataFrame data) { if (this.preserveOrder) { logDebug("Inserting data with order preserved! Each partition will be written using separate jobs."); for (int i = 0; i < data.rdd().partitions().length; i++) { data.sqlContext().sparkContext().runJob(data.rdd(), new AnalyticsWritingFunction(this.tenantId, this.tableName, data.schema(), this.globalTenantAccess, this.schemaString, this.primaryKeys, this.mergeFlag, this.recordStore, this.recordBatchSize), CarbonScalaUtils.getNumberSeq(i, i + 1), false, ClassTag$.MODULE$.Unit()); } } else { data.foreachPartition(new AnalyticsWritingFunction(this.tenantId, this.tableName, data.schema(), this.globalTenantAccess, this.schemaString, this.primaryKeys, this.mergeFlag, this.recordStore, this.recordBatchSize)); } }