@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); ((StreamChannel.Instance) outputs[0]).accept(Stream.concat( ((JavaChannelInstance) inputs[0]).provideStream(), ((JavaChannelInstance) inputs[1]).provideStream()) ); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor executor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); ((JavaChannelInstance) inputs[0]).<T>provideStream().forEach(this.callback); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Type, Key> keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); ((StreamChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).<Type>provideStream() .sorted((e1, e2) -> ((Comparable)keyExtractor.apply(e1)).compareTo(keyExtractor.apply(e2)))); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); ((StreamChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).provideStream().distinct()); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == 1; assert outputs.length == 0; JavaChannelInstance input = (JavaChannelInstance) inputs[0]; final FileSystem fs = FileSystems.requireFileSystem(this.textFileUrl); final Function<T, String> formatter = javaExecutor.getCompiler().compile(this.formattingDescriptor); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(this.textFileUrl)))) { input.<T>provideStream().forEach( dataQuantum -> { try { writer.write(formatter.apply(dataQuantum)); writer.write('\n'); } catch (IOException e) { throw new UncheckedIOException(e); } } ); } catch (IOException e) { throw new RheemException("Writing failed.", e); } return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final BinaryOperator<Type> reduceFunction = javaExecutor.getCompiler().compile(this.reduceDescriptor); JavaExecutor.openFunction(this, reduceFunction, inputs, operatorContext); final Optional<Type> reduction = ((JavaChannelInstance) inputs[0]).<Type>provideStream().reduce(reduceFunction); ((CollectionChannel.Instance) outputs[0]).accept(reduction.isPresent() ? Collections.singleton(reduction.get()) : Collections.emptyList()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Type, KeyType> keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); final Map<KeyType, List<Type>> collocation = ((JavaChannelInstance) inputs[0]).<Type>provideStream().collect( Collectors.groupingBy( keyExtractor, Collectors.toList())); // Not sure if this is thread-safe... Will we use #parallelStream()? ((CollectionChannel.Instance) outputs[0]).accept(collocation.values()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final JavaChannelInstance input = (JavaChannelInstance) inputs[0]; final long count; if (input instanceof CollectionChannel.Instance) { count = ((CollectionChannel.Instance) input).provideCollection().size(); } else { count = input.provideStream().count(); } ((CollectionChannel.Instance) outputs[0]).accept(Collections.singleton(count)); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override @SuppressWarnings("unchecked") public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); int sampleSize = this.getSampleSize(operatorContext); if (sampleSize >= datasetSize) { //return all ((CollectionChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).provideStream().collect(Collectors.toList())); } else { long seed = this.getSeed(operatorContext); rand = new Random(seed); ((CollectionChannel.Instance) outputs[0]).accept(reservoirSample(rand, ((JavaChannelInstance) inputs[0]).<Type>provideStream().iterator(), sampleSize)); } return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
((JavaChannelInstance) inputs[0]).provideStream().forEach(streamChunker::push); streamChunker.fire(); LoggerFactory.getLogger(this.getClass()).info("Writing dataset to {}.", path);
((JavaChannelInstance) inputs[0]).provideStream().forEach( dataQuantum -> { try {
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<InputType, Iterable<OutputType>> flatmapFunction = javaExecutor.getCompiler().compile(this.functionDescriptor); JavaExecutor.openFunction(this, flatmapFunction, inputs, operatorContext); ((StreamChannel.Instance) outputs[0]).accept( ((JavaChannelInstance) inputs[0]).<InputType>provideStream().flatMap(dataQuantum -> StreamSupport.stream( Spliterators.spliteratorUnknownSize( flatmapFunction.apply(dataQuantum).iterator(), Spliterator.ORDERED), false ) ) ); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
((StreamChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).provideStream()); ((StreamChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).<Type>provideStream().filter(new Predicate<Type>() { int streamIndex = 0; int sampleIndex = 0;
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Iterable<InputType>, Iterable<OutputType>> function = javaExecutor.getCompiler().compile(this.functionDescriptor); JavaExecutor.openFunction(this, function, inputs, operatorContext); final Iterable<OutputType> outputDataQuanta = function.apply(Iterators.wrapWithIterable(((JavaChannelInstance) inputs[0]).<InputType>provideStream().iterator())); ((StreamChannel.Instance) outputs[0]).accept(StreamSupport.stream( Spliterators.spliteratorUnknownSize( outputDataQuanta.iterator(), Spliterator.ORDERED), false )); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
} else { final Collection<Input> collection = ((JavaChannelInstance) inputs[0]).<Input>provideStream().collect(Collectors.toList()); stream0 = collection.stream();
@Override @SuppressWarnings("unchecked") public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Predicate<Type> filterFunction = javaExecutor.getCompiler().compile(this.predicateDescriptor); JavaExecutor.openFunction(this, filterFunction, inputs, operatorContext); ((StreamChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).<Type>provideStream().filter(filterFunction)); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final JavaChannelInstance input = (JavaChannelInstance) inputs[0]; final StreamChannel.Instance output = (StreamChannel.Instance) outputs[0]; final Function<InputType, OutputType> function = javaExecutor.getCompiler().compile(this.functionDescriptor); JavaExecutor.openFunction(this, function, inputs, operatorContext); output.accept(input.<InputType>provideStream().map(function)); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Type, KeyType> keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); final BinaryOperator<Type> reduceFunction = javaExecutor.getCompiler().compile(this.reduceDescriptor); JavaExecutor.openFunction(this, reduceFunction, inputs, operatorContext); final Map<KeyType, Type> reductionResult = ((JavaChannelInstance) inputs[0]).<Type>provideStream().collect( Collectors.groupingBy(keyExtractor, new ReducingCollector<>(reduceFunction)) ); ((CollectionChannel.Instance) outputs[0]).accept(reductionResult.values()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
/** * Utility method to forward a {@link JavaChannelInstance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ static void forward(ChannelInstance input, ChannelInstance output) { // Do the forward. if (output instanceof CollectionChannel.Instance) { ((CollectionChannel.Instance) output).accept(((CollectionChannel.Instance) input).provideCollection()); } else if (output instanceof StreamChannel.Instance) { ((StreamChannel.Instance) output).accept(((JavaChannelInstance) input).provideStream()); } else { throw new RheemException(String.format("Cannot forward %s to %s.", input, output)); } // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
final Set<Type> probingTable; if (isMaterialize0) { candidateStream = ((JavaChannelInstance) inputs[0]).provideStream(); probingTable = this.createProbingTable(((JavaChannelInstance) inputs[1]).provideStream()); indexingExecutionLineageNode.addPredecessor(inputs[0].getLineage()); probingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); } else { candidateStream = ((JavaChannelInstance) inputs[1]).provideStream(); probingTable = this.createProbingTable(((JavaChannelInstance) inputs[0]).provideStream()); indexingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); probingExecutionLineageNode.addPredecessor(inputs[0].getLineage());