@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 3); Dataset<String> uniq = Distinct.of(dataset) .windowBy(Time.of(Duration.ofHours(1))) .output(); Distinct distinct = (Distinct) flow.operators().iterator().next(); assertTrue(distinct.getWindowing() instanceof Time); }
@Test public void testDistinctOnStreamWithoutWindowingLabels() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput( ListDataSource.unbounded(asList( "one two three four one one two", "one two three three three")) .withReadDelay(Duration.ofSeconds(2))); // expand it to words Dataset<String> words = FlatMap.of(lines) .using(toWords(w -> w)) .output(); Dataset<String> output = Distinct.of(words) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); ListDataSink<String> out = ListDataSink.get(); output.persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), "four", "one", "three", "two", "one", "three", "two"); }
@Test public void test() throws Exception { Flow f = Flow.create("Test"); ListDataSink<Pair<String, String>> output = ListDataSink.get(); Dataset<Pair<String, String>> input = f.createInput(ListDataSource.unbounded( Arrays.asList( Pair.of("foo", "bar"), Pair.of("quux", "ibis"), Pair.of("foo", "bar"))), // ~ force event time e -> 1L); Distinct.of(input) .windowBy(Time.of(Duration.ofSeconds(1))) .output() .persist(output); new TestFlinkExecutor().submit(f).get(); DatasetAssert.unorderedEquals( output.getOutputs(), Pair.of("foo", "bar"), Pair.of("quux", "ibis")); } }
@Test public void testDistinctOnStreamUsingWindowingLabels() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput( ListDataSource.unbounded(asList( "one two three four one one two", "one two three three three")) .withReadDelay(Duration.ofSeconds(2))); // expand it to words Dataset<String> words = FlatMap.of(lines) .using(toWords(w -> w)) .output(); Dataset<Pair<TimeInterval, String>> output = FlatMap.of(Distinct.of(words).windowBy(Time.of(Duration.ofSeconds(1))).output()) .using((UnaryFunctor<String, Pair<TimeInterval, String>>) (elem, context) -> context.collect(Pair.of((TimeInterval) context.getWindow(), elem))) .output(); ListDataSink<String> out = ListDataSink.get(); // strip the labels again because we cannot test them MapElements.of(output) .using(Pair::getSecond) .output() .persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), "four", "one", "three", "two", "one", "three", "two"); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 3); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<String> uniq = Distinct.named("Distinct1") .of(dataset) .windowBy(windowing) .output(); assertEquals(flow, uniq.getFlow()); assertEquals(1, flow.size()); Distinct distinct = (Distinct) flow.operators().iterator().next(); assertEquals(flow, distinct.getFlow()); assertEquals("Distinct1", distinct.getName()); assertEquals(uniq, distinct.output()); assertSame(windowing, distinct.getWindowing()); }