@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return Join.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Integer l, Long r, Collector<String> c) -> { c.collect(l + "+" + r); }) .output(); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return Join.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Integer l, Long r, Collector<String> c) -> { c.collect(l + "+" + r); }) .output(); }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); Join.of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .output(); Join join = (Join) flow.operators().iterator().next(); assertEquals("Join", join.getName()); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> { TimeInterval window = (TimeInterval) c.getWindow(); c.getCounter("cntr").increment(10); c.getHistogram("hist-" + l.getFirst().charAt(1)).add(2345, 8); c.collect(Triple.of(window, l.getFirst(), r.getFirst())); }) .windowBy(Time.of(Duration.ofMillis(3))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> { TimeInterval window = (TimeInterval) c.getWindow(); c.getCounter("cntr").increment(10); c.getHistogram("hist-" + l.getFirst().charAt(1)).add(2345, 8); c.collect(Triple.of(window, l.getFirst(), r.getFirst())); }) .windowBy(Time.of(Duration.ofMillis(3))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
double rankThreshold = params.getRankThreshold(); Dataset<Pair<String, Double>> joined = Join.of(longStats, shortStats) .by(Pair::getFirst, Pair::getFirst) .using((Pair<String, Integer> left,
@Before public void before() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> c.collect(r.getSecond())) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); output.persist(new StdoutSink<>()); }
.persist(new VoidSink<>()); Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Integer l, Pair<Integer, Long> r, Collector<Long> c) ->
@Test(expected = IllegalArgumentException.class) public void testMultipleOutputsToSameSink() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> { c.collect(r.getSecond()); }) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); ListDataSink<Pair<Object, Long>> sink = ListDataSink.get(); output.persist(sink); reduced.persist(sink); FlowUnfolder.unfold(flow, Executor.getBasicOps()); }
.output(); } else { output = Join.of(firstPair, secondPair) .by(Pair::getFirst, Pair::getFirst) .using((l, r, c) -> c.collect((l.getSecond()) + (r.getSecond())))