private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env); MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed); List<String> result = sampled.collect(); containsResultAsText(result, getSourceStrings()); }
@Test public void testRangePartitionerOnSequenceData() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> dataSource = env.generateSequence(0, 10000); KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector(); MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true)); Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true)); List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect(); Collections.sort(collected, tuple2Comparator); long previousMax = -1; for (Tuple2<Long, Long> tuple2 : collected) { if (previousMax == -1) { previousMax = tuple2.f1; } else { long currentMin = tuple2.f0; assertTrue(tuple2.f0 < tuple2.f1); assertEquals(previousMax + 1, currentMin); previousMax = tuple2.f1; } } }
.withOrders(Order.ASCENDING, Order.DESCENDING) .mapPartition(minMaxSelector) .collect();
.mapPartition(new MinMaxSelector<>(new ComparablePojoComparator())) .mapPartition(new ExtractComparablePojo()) .collect();