final String path = output.addGivenOrTempPath(this.targetPath, javaExecutor.getCompiler().getConfiguration()); final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow( () -> new IllegalStateException(String.format("No file system found for \"%s\".", this.targetPath))
final String path = output.addGivenOrTempPath(this.targetPath, javaExecutor.getCompiler().getConfiguration());
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length <= 1; final FileChannel.Instance output = (FileChannel.Instance) outputs[0]; final String targetPath = output.addGivenOrTempPath(this.targetPath, sparkExecutor.getConfiguration()); RddChannel.Instance input = (RddChannel.Instance) inputs[0]; input.provideRdd() .coalesce(1) // TODO: Remove. This only hotfixes the issue that JavaObjectFileSource reads only a single file. .saveAsObjectFile(targetPath); LoggerFactory.getLogger(this.getClass()).info("Writing dataset to {}.", targetPath); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); final FileChannel.Instance output = (FileChannel.Instance) outputs[0]; final String targetPath = output.addGivenOrTempPath(this.targetPath, sparkExecutor.getConfiguration()); final RddChannel.Instance input = (RddChannel.Instance) inputs[0]; final JavaRDD<Object> rdd = input.provideRdd(); final JavaRDD<String> serializedRdd = rdd .map(dataQuantum -> { // TODO: Once there are more tuple types, make this generic. @SuppressWarnings("unchecked") Tuple2<Object, Object> tuple2 = (Tuple2<Object, Object>) dataQuantum; return String.valueOf(tuple2.field0) + '\t' + String.valueOf(tuple2.field1); }); this.name(serializedRdd); serializedRdd .coalesce(1) // TODO: Allow more than one TSV file? .saveAsTextFile(targetPath); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }