public <F> F clean(F f) { if (getExecutionEnvironment().getConfig().isClosureCleanerEnabled()) { ClosureCleaner.clean(f, true); } else { ClosureCleaner.ensureSerializable(f); } return f; }
protected ProjectCross(DataSet<I1> input1, DataSet<I2> input2, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, CrossProjection<I1, I2> crossProjection, CrossHint hint) { super(input1, input2, new ProjectCrossFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, "unknown"); this.crossProjection = crossProjection; }
protected ProjectCross(DataSet<I1> input1, DataSet<I2> input2, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, CrossHint hint) { super(input1, input2, new ProjectCrossFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, "unknown"); crossProjection = null; }
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, JoinProjection<I1, I2> joinProj) { super(input1, input2, keys1, keys2, new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, Utils.getCallLocationName(4)); this.joinProj = joinProj; }
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) { super(input1, input2, keys1, keys2, new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, Utils.getCallLocationName(4)); // We need to use the 4th element in the stack because the call comes through .types(). joinProj = null; }
@Override public Collect<T> run(DataSet<T> input) throws Exception { super.run(input); serializer = input.getType().createSerializer(env.getConfig()); collectHelper = new CollectHelper<>(serializer); input .output(collectHelper) .name("Collect"); return this; }
@Override protected org.apache.flink.api.common.operators.base.MapOperatorBase<IN, OUT, MapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : "Projection " + Arrays.toString(fields); // create operator PlanProjectOperator<IN, OUT> ppo = new PlanProjectOperator<IN, OUT>(fields, name, getInputType(), getResultType(), context.getConfig()); // set input ppo.setInput(input); // set parallelism ppo.setParallelism(this.getParallelism()); ppo.setSemanticProperties(SemanticPropUtil.createProjectionPropertiesSingle(fields, (CompositeType<?>) getInputType())); return ppo; }
private static <T, B extends CopyableIterator<T>> void testReducePerformance (B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); @SuppressWarnings("unchecked") DataSet<T> output = env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo) .groupBy("0") .reduce(new SumReducer()).setCombineHint(hint); long start = System.currentTimeMillis(); System.out.println(output.count()); long end = System.currentTimeMillis(); if (print) { System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ==="); } }
public void run() throws Exception { LOG.info("Random seed = {}", RANDOM_SEED); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) { LOG.info("Parallelism = {}", parallelism); env.setParallelism(parallelism); testReduce(env); testGroupedReduce(env); testJoin(env); testCross(env); } }
@Test public void testDisjointDataflows() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(5); env.getConfig().disableSysoutLogging(); // generate two different flows env.generateSequence(1, 10).output(new DiscardingOutputFormat<Long>()); env.generateSequence(1, 10).output(new DiscardingOutputFormat<Long>()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testIdentityMapWithMissingTypesAndStringTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>()) .returns(new TypeHint<Tuple3<Integer, Long, String>>(){}); List<Tuple3<Integer, Long, String>> result = identityMapDs.collect(); String expectedResult = "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(1,1,Hi)\n"; compareResultAsText(result, expectedResult); }
@Test public void testFlatMapWithClassTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> identityMapDs = ds .flatMap(new FlatMapper<Tuple3<Integer, Long, String>, Integer>()) .returns(Integer.class); List<Integer> result = identityMapDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
@Test public void testFaultyAccumulator() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // Test Exception forwarding with faulty Accumulator implementation env.generateSequence(0, 10000) .map(new FaultyAccumulatorUsingMapper()) .output(new DiscardingOutputFormat<>()); assertAccumulatorsShouldFail(env.execute()); }
@Test public void testFaultyMergeAccumulator() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // Test Exception forwarding with faulty Accumulator implementation env.generateSequence(0, 10000) .map(new FaultyMergeAccumulatorUsingMapper()) .output(new DiscardingOutputFormat<>()); assertAccumulatorsShouldFail(env.execute()); }
@Test public void testUnsortedGroupReduceWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds .groupBy(0) .reduceGroup(new GroupReducer<Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
@Test public void testCombineGroupWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds .groupBy(0) .combineGroup(new GroupCombiner<Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
@Test public void testIdentityMapWithMissingTypesAndTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds // all following generics get erased during compilation .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>()) .returns(new TupleTypeInfo<Tuple3<Integer, Long, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)); List<Tuple3<Integer, Long, String>> result = identityMapDs .collect(); String expectedResult = "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(1,1,Hi)\n"; compareResultAsText(result, expectedResult); }
@Test public void testSortedGroupReduceWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds .groupBy(0) .sortGroup(0, Order.ASCENDING) .reduceGroup(new GroupReducer<Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
@Test public void testCoGroupWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds1 .coGroup(ds2) .where(0) .equalTo(0) .with(new CoGrouper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
@Test public void testFlatJoinWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds1 .join(ds2) .where(0) .equalTo(0) .with(new FlatJoiner<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }