public <S> Dataset<S> flatMap(UnaryFunctor<T, S> f) { return new Dataset<>(FlatMap.of(this.wrap).using(requireNonNull(f)).output()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> c.collect(s)) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNull(map.getEventTimeExtractor()); }
@Override @SuppressWarnings("unchecked") public JavaRDD<?> translate(FlatMap operator, SparkExecutorContext context) { final JavaRDD<?> input = context.getSingleInput(operator); final UnaryFunctor<?, ?> mapper = operator.getFunctor(); final ExtractEventTime<?> evtTimeFn = operator.getEventTimeExtractor(); LazyAccumulatorProvider accumulators = new LazyAccumulatorProvider(context.getAccumulatorFactory(), context.getSettings()); if (evtTimeFn != null) { return input .flatMap(new EventTimeAssigningUnaryFunctor(mapper, evtTimeFn, accumulators)) .setName(operator.getName() + "::event-time-and-apply-udf"); } else { return input .flatMap(new UnaryFunctorWrapper(mapper, accumulators)) .setName(operator.getName() + "::apply-udf"); } } }
new KafkaSource(uri.getAuthority(), uri.getPath().substring(1), settings)); return FlatMap.of(input) .using(new UnaryFunctor<Pair<byte[], byte[]>, Pair<Long, String>>() { private final SearchEventsParser parser = new SearchEventsParser(); DataSource<String> datasource = new SimpleHadoopTextFileSource(uri.toString()); Dataset<String> in = flow.createInput(datasource); return FlatMap.named("PARSE-INPUT") .of(in) .using(new UnaryFunctor<String, Pair<Long, String>>() {
flatMap.getSingleParentOrNull().get(), flatMap.get()); InputProvider ret = new InputProvider(); final UnaryFunctor mapper = flatMap.get().getFunctor(); final ExtractEventTime eventTimeFn = flatMap.get().getEventTimeExtractor(); for (Supplier s : suppliers) { final BlockingQueue<Datum> out = new ArrayBlockingQueue(5000);
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.of(dataset) .using((String s, Collector<String> c) -> c.collect(s)) .output(); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals("FlatMap", map.getName()); }
/** * Collects Avro record as JSON string * * @param outSink * @param inSource * @throws Exception */ public static void runFlow( DataSink<String> outSink, DataSource<Pair<AvroKey<GenericData.Record>, NullWritable>> inSource) throws Exception { Flow flow = Flow.create("simple read avro"); Dataset<Pair<AvroKey<GenericData.Record>, NullWritable>> input = flow.createInput(inSource); final Dataset<String> output = FlatMap.named("avro2csv").of(input).using(AvroSourceTest::apply).output(); output.persist(outSink); Executor executor = new LocalExecutor(); executor.submit(flow).get(); }
@Override public DAG<Operator<?, ?>> getBasicOps() { return DAG.of(new FlatMap<>( getName(), getFlow(), input, (i, c) -> c.collect(i), eventTimeFn, getHints())); }
flatMap.getSingleParentOrNull().get(), flatMap.get()); InputProvider ret = new InputProvider(); final UnaryFunctor mapper = flatMap.get().getFunctor(); final ExtractEventTime eventTimeFn = flatMap.get().getEventTimeExtractor(); for (Supplier s : suppliers) { final BlockingQueue<Datum> out = new ArrayBlockingQueue(5000);
return FlatMap.named("FORMAT-OUTPUT") .of(aggregated) .using(((Pair<String, Long> elem, Collector<String> context) -> {
@Override public DAG<Operator<?, ?>> getBasicOps() { return DAG.of(new FlatMap<>( getName(), getFlow(), input, (i, c) -> c.collect(i), eventTimeFn, getHints())); }
@Override protected Dataset<Integer> getOutput(Dataset<Integer> input) { return FlatMap.of(input) .using((Integer e, Collector<Integer> c) -> { for (int i = 1; i <= e; i++) { c.collect(i); } }) .output(); }
@Test public void testBuild_EventTimeExtractor() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<BigDecimal> mapped = FlatMap.named("FlatMap2") .of(dataset) .using((String s, Collector<BigDecimal> c) -> c.collect(null)) .eventTimeBy(Long::parseLong) // ~ consuming the original input elements .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap2", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNotNull(map.getEventTimeExtractor()); }
@Override @SuppressWarnings("unchecked") public DataStream<?> translate(FlinkOperator<FlatMap> operator, StreamingExecutorContext context) { Settings settings = context.getSettings(); FlinkAccumulatorFactory accumulatorFactory = context.getAccumulatorFactory(); DataStream input = context.getSingleInputStream(operator); UnaryFunctor mapper = operator.getOriginalOperator().getFunctor(); ExtractEventTime evtTimeFn = operator.getOriginalOperator().getEventTimeExtractor(); if (evtTimeFn != null) { input = input.assignTimestampsAndWatermarks( new EventTimeAssigner(context.getAllowedLateness(), evtTimeFn)) .returns((Class) StreamingElement.class); } return input .flatMap(new StreamingUnaryFunctorWrapper(mapper, accumulatorFactory, settings)) .returns((Class) StreamingElement.class) .name(operator.getName()) .setParallelism(operator.getParallelism()); } }
final Dataset<String> words = FlatMap.named("TOKENIZER") .of(lines) .using((String line, Collector<String> c) ->
/** This operator can be implemented using FlatMap. */ @Override public DAG<Operator<?, ?>> getBasicOps() { return DAG.of(new FlatMap<>(getName(), getFlow(), input, (elem, collector) -> { if (predicate.apply(elem)) { collector.collect(elem); } }, null, getHints())); } }
@Override protected Dataset<Integer> getOutput(Dataset<Integer> input) { return FlatMap.of(input) .using((Integer e, Collector<Integer> c) -> { for (int i = 1; i <= e; i++) { c.collect(i); } }) .output(); }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(s); }) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); }