@Override public void doDispose() throws RheemException { Actions.doSafe(() -> { logger.info("Deleting file channel instances {}.", this.paths); final String path = this.getSinglePath(); final Optional<FileSystem> fileSystemOptional = FileSystems.getFileSystem(path); fileSystemOptional.ifPresent(fs -> { try { fs.delete(path, true); } catch (IOException e) { throw new UncheckedIOException(e); } }); }); } }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { final String sourcePath; if (this.sourcePath != null) { assert inputs.length == 0; sourcePath = this.sourcePath; } else { FileChannel.Instance input = (FileChannel.Instance) inputs[0]; sourcePath = input.getSinglePath(); } RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath); final JavaRDD<String> linesRdd = sparkExecutor.sc.textFile(actualInputPath); this.name(linesRdd); final JavaRDD<T> dataQuantaRdd = linesRdd .map(line -> { // TODO: Important. Enrich type informations to create the correct parser! int tabPos = line.indexOf('\t'); return (T) new Tuple2<>( Integer.valueOf(line.substring(0, tabPos)), Float.valueOf(line.substring(tabPos + 1))); }); this.name(dataQuantaRdd); output.accept(dataQuantaRdd, sparkExecutor); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert outputs.length == this.getNumOutputs(); SequenceFileIterator sequenceFileIterator; final String path; if (this.sourcePath == null) { final FileChannel.Instance input = (FileChannel.Instance) inputs[0]; path = input.getSinglePath(); } else { assert inputs.length == 0; path = this.sourcePath; } try { final String actualInputPath = FileSystems.findActualSingleInputPath(path); sequenceFileIterator = new SequenceFileIterator<>(actualInputPath); Stream<?> sequenceFileStream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(sequenceFileIterator, 0), false); ((StreamChannel.Instance) outputs[0]).accept(sequenceFileStream); } catch (IOException e) { throw new RheemException(String.format("%s failed to read from %s.", this, path), e); } return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
final String inputPath = inputFileChannelInstance.getSinglePath(); final String actualInputPath = FileSystems.findActualSingleInputPath(inputPath); final FileSystem inputFs = FileSystems.getFileSystem(inputPath).orElseThrow(
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { final String sourcePath; if (this.sourcePath != null) { assert inputs.length == 0; sourcePath = this.sourcePath; } else { FileChannel.Instance input = (FileChannel.Instance) inputs[0]; sourcePath = input.getSinglePath(); } RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath); final JavaRDD<Object> rdd = sparkExecutor.sc.objectFile(actualInputPath); this.name(rdd); output.accept(rdd, sparkExecutor); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert outputs.length == this.getNumOutputs(); final String path; if (this.sourcePath == null) { final FileChannel.Instance input = (FileChannel.Instance) inputs[0]; path = input.getSinglePath(); } else { assert inputs.length == 0; path = this.sourcePath; } final String actualInputPath = FileSystems.findActualSingleInputPath(path); Stream<T> stream = this.createStream(actualInputPath); ((StreamChannel.Instance) outputs[0]).accept(stream); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }