@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
.applyIf(windowing != null, b -> b.windowBy(windowing)) .outputValues();
.valueBy(line -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofDays(1))) .output();
.valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofMillis(10))) .output();
.valueBy(e -> 1) .combineBy(Sums.ofInts()) .windowBy(windowing) .output();
.valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.valueBy(e -> 1) .combineBy(Sums.ofInts()) .windowBy(windowing) .output();
.valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .output();
.valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output();
return buf.toString(); }) .windowBy(Time.of(Duration.ofMillis(5))) .output();
.valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofMillis(5))) .output();
.valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofMillis(5))) .output();
@Test public void testWordCountBatch() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput(ListDataSource.bounded( asList("one two three four", "one two three", "one two", "one"))); // expand it to words Dataset<Pair<String, Long>> words = FlatMap.of(lines) .using(toWordCountPair()) .output(); // reduce it to counts, use windowing, so the output is batch or stream // depending on the type of input Dataset<Pair<String, Long>> streamOutput = ReduceByKey .of(words) .keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .output(); ListDataSink<Pair<String, Long>> out = ListDataSink.get(); streamOutput.persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), Pair.of("one", 4L), Pair.of("two", 3L), Pair.of("three", 2L), Pair.of("four", 1L)); }
@Test public void testMapWithOutputGroupping() throws InterruptedException, ExecutionException { ListDataSource<String> input = ListDataSource.unbounded(asList( "one two three four four two two", "one one one two two three")); Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput(input); // expand it to words Dataset<Pair<String, Long>> words = FlatMap.of(lines) .using(toWordCountPair()) .output(); ListDataSink<Pair<String, Long>> sink = ListDataSink.get(); // apply wordcount transform in output sink words.persist( sink.withPrepareDataset(d -> ReduceByKey.of(d) .keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .output() .persist(sink))); executor.submit(flow).get(); DatasetAssert.unorderedEquals( sink.getOutputs(), Pair.of("one", 4L), Pair.of("two", 5L), Pair.of("three", 2L), Pair.of("four", 2L)); }