/** * Creates a new {@link PairStream} of key-value pairs from the given {@link IRichSpout} by extracting key and value from tuples via the * supplied {@link PairValueMapper}. * * @param spout the spout * @param pairValueMapper the pair value mapper * @param <K> the key type * @param <V> the value type * @return the new stream of key-value pairs */ public <K, V> PairStream<K, V> newStream(IRichSpout spout, PairValueMapper<K, V> pairValueMapper) { return newStream(spout).mapToPair(pairValueMapper); }
/** * Creates a new {@link PairStream} of key-value pairs from the given {@link IRichSpout} by extracting key and value from tuples via the * supplied {@link PairValueMapper} and with the given value of parallelism. * * @param spout the spout * @param pairValueMapper the pair value mapper * @param parallelism the parallelism of the stream * @param <K> the key type * @param <V> the value type * @return the new stream of key-value pairs */ public <K, V> PairStream<K, V> newStream(IRichSpout spout, PairValueMapper<K, V> pairValueMapper, int parallelism) { return newStream(spout, parallelism).mapToPair(pairValueMapper); }
@Override public void streamsPlan(StreamsPlanCreator planCreator) throws Exception { // SingleRel RelNode input = getInput(); StormRelUtils.getStormRelInput(input).streamsPlan(planCreator); Stream<Values> inputStream = planCreator.pop(); Preconditions.checkArgument(isInsert(), "Only INSERT statement is supported."); // Calcite ensures that the value is structurized to the table definition // hence we can use PK index directly // To elaborate, if table BAR is defined as ID INTEGER PK, NAME VARCHAR, DEPTID INTEGER // and query like INSERT INTO BAR SELECT NAME, ID FROM FOO is executed, // Calcite makes the projection ($1 <- ID, $0 <- NAME, null) to the value before INSERT. String tableName = Joiner.on('.').join(getTable().getQualifiedName()); IRichBolt consumer = planCreator.getSources().get(tableName).getConsumer(); // To make logic simple, it assumes that all the tables have one PK (which it should be extended to support composed key), // and provides PairStream(KeyedStream) to consumer bolt. inputStream.mapToPair(new StreamInsertMapToPairFunction(primaryKeyIndex)).to(consumer); planCreator.addStream(inputStream); }
@Test public void testJoin() throws Exception { Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0)); Stream<Integer>[] streams = stream.branch(x -> x % 2 == 0, x -> x % 3 == 0); PairStream<Integer, Integer> s1 = streams[0].mapToPair(x -> Pair.of(x, 1)); PairStream<Integer, Integer> s2 = streams[1].mapToPair(x -> Pair.of(x, 1)); PairStream<Integer, Pair<Integer, Integer>> sj = s1.join(s2); assertEquals(Collections.singleton(s1.node), sj.node.getParents(s1.stream)); assertEquals(Collections.singleton(s2.node), sj.node.getParents(s2.stream)); }
@Test public void testBranchAndJoin() throws Exception { TopologyContext mockContext = Mockito.mock(TopologyContext.class); OutputCollector mockCollector = Mockito.mock(OutputCollector.class); Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0), 2); Stream<Integer>[] streams = stream.branch(x -> x % 2 == 0, x -> x % 2 == 1); PairStream<Integer, Pair<Integer, Integer>> joined = streams[0].mapToPair(x -> Pair.of(x, 1)).join(streams[1].mapToPair(x -> Pair.of(x, 1))); assertTrue(joined.getNode() instanceof ProcessorNode); StormTopology topology = streamBuilder.build(); assertEquals(2, topology.get_bolts_size()); }
@Test public void testPartitionByKeySinglePartition() { TopologyContext mockContext = Mockito.mock(TopologyContext.class); OutputCollector mockCollector = Mockito.mock(OutputCollector.class); Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0)); stream.mapToPair(x -> Pair.of(x, x)) .reduceByKey((x, y) -> x + y) .print(); StormTopology topology = streamBuilder.build(); assertEquals(1, topology.get_bolts_size()); } }
.mapToPair(w -> Pair.of(w, 1))
@Test public void testMultiPartitionByKeyWithRepartition() { TopologyContext mockContext = Mockito.mock(TopologyContext.class); OutputCollector mockCollector = Mockito.mock(OutputCollector.class); Map<GlobalStreamId, Grouping> expected = new HashMap<>(); expected.put(new GlobalStreamId("bolt2", "s3"), Grouping.fields(Collections.singletonList("key"))); expected.put(new GlobalStreamId("bolt2", "s3__punctuation"), Grouping.all(new NullStruct())); Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0)); stream.mapToPair(x -> Pair.of(x, x)) .window(TumblingWindows.of(BaseWindowedBolt.Count.of(10))) .reduceByKey((x, y) -> x + y) .repartition(10) .reduceByKey((x, y) -> 0) .print(); StormTopology topology = streamBuilder.build(); assertEquals(3, topology.get_bolts_size()); assertEquals(expected, topology.get_bolts().get("bolt3").get_common().get_inputs()); }
public static void main(String[] args) throws Exception { StreamBuilder builder = new StreamBuilder(); /** * The spout emits sequences of (Integer, Long, Long). TupleValueMapper can be used to extract fields * from the values and produce a stream of typed tuple (Tuple3<Integer, Long, Long> in this case. */ Stream<Tuple3<Integer, Long, Long>> stream = builder.newStream(new RandomIntegerSpout(), TupleValueMappers.of(0, 1, 2)); PairStream<Long, Integer> pairs = stream.mapToPair(t -> Pair.of(t._2 / 10000, t._1)); pairs.window(TumblingWindows.of(Count.of(10))).groupByKey().print(); String topoName = "test"; if (args.length > 0) { topoName = args[0]; } Config config = new Config(); config.setNumWorkers(1); StormSubmitter.submitTopologyWithProgressBar(topoName, config, builder.build()); } }
@Test public void testMultiPartitionByKey() { TopologyContext mockContext = Mockito.mock(TopologyContext.class); OutputCollector mockCollector = Mockito.mock(OutputCollector.class); Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0)); stream.mapToPair(x -> Pair.of(x, x)) .window(TumblingWindows.of(BaseWindowedBolt.Count.of(10))) .reduceByKey((x, y) -> x + y) .reduceByKey((x, y) -> 0) .print(); StormTopology topology = streamBuilder.build(); assertEquals(2, topology.get_bolts_size()); }
.mapToPair(w -> Pair.of(w, 1))
.mapToPair(w -> Pair.of(w, 1))
.mapToPair(w -> Pair.of(w, 1))