@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, Set<String>>> getOutput (Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); Dataset<Pair<Integer, Set<String>>> reduced = ReduceByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, Set<String>>, Triple<TimeInterval, Integer, Set<String>>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Before public void before() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> c.collect(r.getSecond())) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); output.persist(new StdoutSink<>()); }
@Test public void testBuild_sortedValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.valueComparator); }
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); assertNull(reduce.valueComparator); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { return new DatasetBuilder5<>( name, input, keyExtractor, keyClass, valueExtractor, reducer, null, valuesComparator) .output(outputHints); } }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst, Integer.class) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<String, Long>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(new CWindowing<>(3)) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 3, Integer.class) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(new TestWindowing()) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst, Integer.class) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { return new DatasetBuilder5<>( name, input, keyExtractor, keyClass, valueExtractor, reducer, null, valuesComparator) .output(outputHints); } }
@Override protected Dataset<Pair<String, Long>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(new CWindowing<>(3)) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 3, Integer.class) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(new TestWindowing()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .reduceBy(Fold.whileEmittingEach(0, (a, b) -> a + b)) .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .reduceBy(Fold.whileEmittingEach(0, (a, b) -> a + b)) .windowBy(Count.of(3)) .output(); }