@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(e -> e.getFirst().substring(2)) .stateFactory((StateFactory<String, String, AccState<String>>) AccState::new) .mergeStatesBy(AccState::combine) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(e -> e.getFirst().substring(2)) .stateFactory((StateFactory<String, String, AccState<String>>) AccState::new) .mergeStatesBy(AccState::combine) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Pair<String, Integer>> input) { return ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory((StateFactory<String, Long, CountState<String>>) CountState::new) .mergeStatesBy(CountState::combine) // FIXME .timedBy(Pair::getSecond) and make the assertion in the validation phase stronger .windowBy(Count.of(3)) .output(); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceStateByKey.named("ReduceStateByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceStateByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getStateMerger()); assertNotNull(reduce.getStateFactory()); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }
@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory(AccState<String>::new) .mergeStatesBy(AccState::combine) .windowBy(Time.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory((StateFactory<String, String, AccState<String>>) AccState::new) .mergeStatesBy(AccState::combine) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory((StateFactory<String, String, AccState<String>>) AccState::new) .mergeStatesBy(AccState::combine) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, String>> getOutput(Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(e -> e.getSecond()).output(); Dataset<Pair<Integer, String>> reduced = ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory(AccState<String>::new) .mergeStatesBy(AccState::combine) .windowBy(Time.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, String>, Triple<TimeInterval, Integer, String>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<Integer, Integer, Integer>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, Integer>> output = ReduceStateByKey.of(input) .keyBy(e -> e % 3) .valueBy(e -> e) .stateFactory(SortState::new) .mergeStatesBy(SortState::combine) .windowBy(new ReduceByKeyTest.TestWindowing()) .output(); return FlatMap.of(output) .using((UnaryFunctor<Pair<Integer, Integer>, Triple<Integer, Integer, Integer>>) (elem, c) -> c.collect(Triple.of(((IntWindow) c.getWindow()).getValue(), elem.getFirst(), elem.getSecond()))) .output(); }
@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Override protected Dataset<Triple<Integer, Integer, Integer>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, Integer>> output = ReduceStateByKey.of(input) .keyBy(e -> e % 3) .valueBy(e -> e) .stateFactory(SortState::new) .mergeStatesBy(SortState::combine) .windowBy(new ReduceByKeyTest.TestWindowing()) .output(); return FlatMap.of(output) .using((UnaryFunctor<Pair<Integer, Integer>, Triple<Integer, Integer, Integer>>) (elem, c) -> c.collect(Triple.of(((IntWindow) c.getWindow()).getValue(), elem.getFirst(), elem.getSecond()))) .output(); }
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Pair<String, Long>> reduced = ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertEquals("ReduceStateByKey", reduce.getName()); }
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(CountingSortState::new) .mergeStatesBy((target, others) -> {}) .output(); }
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(CountingSortState::new) .mergeStatesBy((target, others) -> {}) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceStateByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getFirst) .stateFactory((StateFactory<Word, Long, CountState<Word>>) CountState::new) .mergeStatesBy(CountState::combine) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(SortState::new) .mergeStatesBy(SortState::combine) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Pair<String, Integer>> input) { return ReduceStateByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .stateFactory((StateFactory<String, Long, CountState<String>>) CountState::new) .mergeStatesBy(CountState::combine) // FIXME .timedBy(Pair::getSecond) and make the assertion in the validation phase stronger .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(SortState::new) .mergeStatesBy(SortState::combine) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceStateByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getFirst) .stateFactory((StateFactory<Word, Long, CountState<Word>>) CountState::new) .mergeStatesBy(CountState::combine) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }