/** * Applies a Filter transformation on a {@link DataSet}.<br/> * The transformation calls a {@link FilterFunction} for each element of the DataSet * and retains only those element for which the function returns true. Elements for * which the function returns false are filtered. * * @param filter The FilterFunction that is called for each element of the DataSet. * @return A FilterOperator that represents the filtered DataSet. * * @see FilterFunction * @see FilterOperator * @see DataSet */ public FilterOperator<T> filter(FilterFunction<T> filter) { if (filter == null) { throw new NullPointerException("Filter function must not be null."); } return new FilterOperator<T>(this, filter); }
public FilterOperator(DataSet<T> input, FilterFunction<T> function) { super(input, input.getType()); this.function = function; extractSemanticAnnotationsFromUdf(function.getClass()); }
@Override protected eu.stratosphere.api.common.operators.base.FilterOperatorBase<T, GenericFlatMap<T,T>> translateToDataFlow(Operator<T> input) { String name = getName() != null ? getName() : function.getClass().getName(); // create operator PlanFilterOperator<T> po = new PlanFilterOperator<T>(function, name, getInputType()); // set input po.setInput(input); // set dop if(this.getParallelism() > 0) { // use specified dop po.setDegreeOfParallelism(this.getParallelism()); } else { // if no dop has been specified, use dop of input operator to enable chaining po.setDegreeOfParallelism(input.getDegreeOfParallelism()); } return po; } }
return value.f0 < literal; }).withBroadcastSet(ints, "ints"); filterDs.writeAsCsv(resultPath); env.execute(); return (value.f1 == (broadcastSum / 11)); }).withBroadcastSet(intDs, "ints");; filterDs.writeAsCsv(resultPath); env.execute();
.project(0,4).types(Integer.class, Integer.class);
.project(0,4).types(Integer.class, Integer.class);