return new Tuple2<>(new org.apache.hadoop.io.Text(tuple2._1().array()), textResult); }).saveAsNewAPIHadoopDataset(job.getConfiguration()); logger.info("Persisting RDD for level " + level + " into " + cuboidOutputPath);
rowKeyWritableKeyValueTuple2._2); }).saveAsNewAPIHadoopDataset(job.getConfiguration());
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()])) .reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig)) .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration()); .reduceByKey(reduceFunction, SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig)) .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
public <K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName, Class<K> keyClass, Class<V> valueClass) { Set<String> outputNames = sinkOutputs.get(sinkName); if (outputNames == null || outputNames.isEmpty()) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException(sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point."); } for (String outputName : outputNames) { OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName); if (outputFormatProvider != null) { Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration().entrySet()) { hConf.set(entry.getKey(), entry.getValue()); } hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName()); rdd.saveAsNewAPIHadoopDataset(hConf); } DatasetInfo datasetInfo = datasetInfos.get(outputName); if (datasetInfo != null) { sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs()); } } }
unwrapped.saveAsNewAPIHadoopDataset(conf); } catch (IOException e) { throw new RuntimeException(e);
feat -> new Tuple2<GeoWaveOutputKey, SimpleFeature>( new GeoWaveOutputKey(typeName.value(), indexName.value()), feat)).saveAsNewAPIHadoopDataset(job.getConfiguration());
return new Tuple2<>(new org.apache.hadoop.io.Text(tuple2._1().array()), textResult); }).saveAsNewAPIHadoopDataset(job.getConfiguration()); logger.info("Persisting RDD for level " + level + " into " + cuboidOutputPath);
rowKeyWritableKeyValueTuple2._2); }).saveAsNewAPIHadoopDataset(job.getConfiguration());
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()])) .reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig)) .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration()); .reduceByKey(reduceFunction, SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig)) .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf);