public static DataSet<Long> getDefaultPagesDataSet(ExecutionEnvironment env) { return env.generateSequence(1, 15); }
public static void main(String[] args) throws Exception { final long numSamples = args.length > 0 ? Long.parseLong(args[0]) : 1000000; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // count how many of the samples would randomly fall into // the unit circle DataSet<Long> count = env.generateSequence(1, numSamples) .map(new Sampler()) .reduce(new SumReducer()); long theCount = count.collect().get(0); System.out.println("We estimate Pi to be: " + (theCount * 4.0 / numSamples)); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<Long> input = env.generateSequence(1, 10); DataSet<Long> bc1 = env.generateSequence(1, 5); DataSet<Long> bc2 = env.generateSequence(6, 10); List<Long> result = input .map(new Mapper()) .withBroadcastSet(bc1.union(bc2), BC_NAME) .reduce(new Reducer()) .collect(); Assert.assertEquals(Long.valueOf(3025), result.get(0)); }
private void createSequenceSource(ExecutionEnvironment env, PythonOperationInfo info) { sets.add(info.setID, env.generateSequence(info.frm, info.to).setParallelism(info.parallelism).name("SequenceSource") .map(new SerializerMap<Long>()).setParallelism(info.parallelism).name("SequenceSourcePostStep")); }
@Test public void testZipWithUniqueId() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); long expectedSize = 100L; DataSet<Long> numbers = env.generateSequence(1L, expectedSize); DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() { @Override public Long map(Tuple2<Long, Long> value) throws Exception { return value.f0; } }); Set<Long> result = new HashSet<>(ids.collect()); Assert.assertEquals(expectedSize, result.size()); }
@Test(expected = InvalidProgramException.class) public void testFailPrimitiveOrder2() { final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Long> longDs = env .generateSequence(0, 2); // must not work longDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput("0", Order.ASCENDING); }
@Test(expected = InvalidProgramException.class) public void testFailPrimitiveOrder3() { final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Long> longDs = env .generateSequence(0, 2); // must not work longDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput("nope", Order.ASCENDING); }
@Test(expected = InvalidProgramException.class) public void testFailPrimitiveOrder1() { final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Long> longDs = env .generateSequence(0, 2); // must not work longDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput(0, Order.ASCENDING); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> inputStatic = env.generateSequence(1, 4); DataSet<Long> inputIteration = env.generateSequence(1, 4); IterativeDataSet<Long> iteration = inputIteration.iterate(3); DataSet<Long> result = iteration.closeWith(inputStatic.union(inputStatic).union(iteration.union(iteration))); result.output(new LocalCollectionOutputFormat<Long>(this.result)); env.execute(); }
@Test public void testCountElementsPerPartition() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); long expectedSize = 100L; DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1); DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers); Assert.assertEquals(env.getParallelism(), ds.count()); Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue()); }
@Test public void testPrimitiveOrder() { final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Long> longDs = env .generateSequence(0, 2); // should work try { longDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput("*", Order.ASCENDING); } catch (Exception e) { Assert.fail(); } }
private void executeTask(MapFunction<Long, Long> mapper, int retries) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(retries, 0)); List<Long> result = env.generateSequence(1, 9) .map(mapper) .collect(); MultipleProgramsTestBase.compareResultAsText(result, "1\n2\n3\n4\n5\n6\n7\n8\n9"); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); IterativeDataSet<Long> iteration = env.generateSequence(1, 10).iterate(100); iteration.closeWith(iteration) .output(new LocalCollectionOutputFormat<Long>(result)); env.execute(); }
@Test public void testFaultyMergeAccumulator() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // Test Exception forwarding with faulty Accumulator implementation env.generateSequence(0, 10000) .map(new FaultyMergeAccumulatorUsingMapper()) .output(new DiscardingOutputFormat<>()); assertAccumulatorsShouldFail(env.execute()); }
@Test public void testFaultyAccumulator() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // Test Exception forwarding with faulty Accumulator implementation env.generateSequence(0, 10000) .map(new FaultyAccumulatorUsingMapper()) .output(new DiscardingOutputFormat<>()); assertAccumulatorsShouldFail(env.execute()); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new Dupl()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 20, 0); iter.closeWith(iter.getWorkset(), iter.getWorkset()) .output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result)); env.execute(); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> data1 = env.generateSequence(1, 100); DataSet<Long> data2 = env.generateSequence(1, 100); IterativeDataSet<Long> firstIteration = data1.iterate(100); DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdMapper())); IterativeDataSet<Long> mainIteration = data2.map(new IdMapper()).iterate(100); DataSet<Long> joined = mainIteration.join(firstResult) .where(new IdKeyExtractor()).equalTo(new IdKeyExtractor()) .with(new Joiner()); DataSet<Long> mainResult = mainIteration.closeWith(joined); mainResult.output(new DiscardingOutputFormat<Long>()); env.execute(); }
@Test public void testSupportForDataAndEnumSerialization() throws Exception { /** * Test support for Date and enum serialization */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<PojoWithDateAndEnum> ds = env.generateSequence(0, 2).map(new Mapper1()); ds = ds.union(CollectionDataSets.getPojoWithDateAndEnum(env)); DataSet<String> res = ds.groupBy("group").reduceGroup(new GroupReducer1()); List<String> result = res.collect(); String expected = "ok\nok"; compareResultAsText(result, expected); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (!isCollectionExecution()) { Assert.assertTrue(env.getParallelism() > 1); } env.generateSequence(1, 1000) .partitionCustom(new AllZeroPartitioner(), new IdKeySelector<Long>()) .map(new FailExceptInPartitionZeroMapper()) .output(new DiscardingOutputFormat<Long>()); env.execute(); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longs = env.generateSequence(0, 100000); DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper()); DataSet<Tuple1<Long>> longT2 = longT1.project(0); DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper()); longT2.join(longT3).where(0).equalTo(0).projectFirst(0) .join(longT1).where(0).equalTo(0).projectFirst(0) .writeAsText(resultPath); env.execute(); }