@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { final StreamChannel.Instance streamChannelInstance = (StreamChannel.Instance) inputs[0]; final CollectionChannel.Instance collectionChannelInstance = (CollectionChannel.Instance) outputs[0]; final List<?> collection = streamChannelInstance.provideStream().collect(Collectors.toList()); collectionChannelInstance.accept(collection); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == 1; assert outputs.length == 1; CollectionChannel.Instance inputChannelInstance = (CollectionChannel.Instance) inputs[0]; final Collection<?> dataQuanta = inputChannelInstance.provideCollection(); Collection<Iterable<?>> dataQuantaGroup = new ArrayList<>(1); dataQuantaGroup.add(dataQuanta); CollectionChannel.Instance outputChannelInstance = (CollectionChannel.Instance) outputs[0]; outputChannelInstance.accept(dataQuantaGroup); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
coGroups.add(new Tuple2<>(Collections.emptyList(), group1)); ((CollectionChannel.Instance) outputs[0]).accept(coGroups);
@Override @SuppressWarnings("unchecked") public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); int sampleSize = this.getSampleSize(operatorContext); if (sampleSize >= datasetSize) { //return all ((CollectionChannel.Instance) outputs[0]).accept(((JavaChannelInstance) inputs[0]).provideStream().collect(Collectors.toList())); } else { long seed = this.getSeed(operatorContext); rand = new Random(seed); ((CollectionChannel.Instance) outputs[0]).accept(reservoirSample(rand, ((JavaChannelInstance) inputs[0]).<Type>provideStream().iterator(), sampleSize)); } return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
/** * Utility method to forward a {@link JavaChannelInstance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ static void forward(ChannelInstance input, ChannelInstance output) { // Do the forward. if (output instanceof CollectionChannel.Instance) { ((CollectionChannel.Instance) output).accept(((CollectionChannel.Instance) input).provideCollection()); } else if (output instanceof StreamChannel.Instance) { ((StreamChannel.Instance) output).accept(((JavaChannelInstance) input).provideStream()); } else { throw new RheemException(String.format("Cannot forward %s to %s.", input, output)); } // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final RddChannel.Instance input = (RddChannel.Instance) inputs[0]; final CollectionChannel.Instance output = (CollectionChannel.Instance) outputs[0]; final Function2<Type, Type, Type> reduceFunction = sparkExecutor.getCompiler().compile(this.reduceDescriptor, this, operatorContext, inputs); final JavaRDD<Type> inputRdd = input.provideRdd(); List<Type> outputList = Collections.singletonList(inputRdd.reduce(reduceFunction)); output.accept(outputList); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Type, KeyType> keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); final BinaryOperator<Type> reduceFunction = javaExecutor.getCompiler().compile(this.reduceDescriptor); JavaExecutor.openFunction(this, reduceFunction, inputs, operatorContext); final Map<KeyType, Type> reductionResult = ((JavaChannelInstance) inputs[0]).<Type>provideStream().collect( Collectors.groupingBy(keyExtractor, new ReducingCollector<>(reduceFunction)) ); ((CollectionChannel.Instance) outputs[0]).accept(reductionResult.values()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final BinaryOperator<Type> reduceFunction = javaExecutor.getCompiler().compile(this.reduceDescriptor); JavaExecutor.openFunction(this, reduceFunction, inputs, operatorContext); final Optional<Type> reduction = ((JavaChannelInstance) inputs[0]).<Type>provideStream().reduce(reduceFunction); ((CollectionChannel.Instance) outputs[0]).accept(reduction.isPresent() ? Collections.singleton(reduction.get()) : Collections.emptyList()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final Function<Type, KeyType> keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); final Map<KeyType, List<Type>> collocation = ((JavaChannelInstance) inputs[0]).<Type>provideStream().collect( Collectors.groupingBy( keyExtractor, Collectors.toList())); // Not sure if this is thread-safe... Will we use #parallelStream()? ((CollectionChannel.Instance) outputs[0]).accept(collocation.values()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final JavaChannelInstance input = (JavaChannelInstance) inputs[0]; final long count; if (input instanceof CollectionChannel.Instance) { count = ((CollectionChannel.Instance) input).provideCollection().size(); } else { count = input.provideStream().count(); } ((CollectionChannel.Instance) outputs[0]).accept(Collections.singleton(count)); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); RddChannel.Instance input = (RddChannel.Instance) inputs[0]; CollectionChannel.Instance output = (CollectionChannel.Instance) outputs[0]; final Long count = input.provideRdd().count(); output.accept(Collections.singletonList(count)); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { RddChannel.Instance input = (RddChannel.Instance) inputs[0]; CollectionChannel.Instance output = (CollectionChannel.Instance) outputs[0]; @SuppressWarnings("unchecked") final List<Type> collectedRdd = (List<Type>) input.provideRdd().collect(); output.accept(collectedRdd); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override @SuppressWarnings("unchecked") public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == 0; assert outputs.length == 1; ((CollectionChannel.Instance) outputs[0]).accept(this.getCollection()); return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }