protected TridentState addSourcedStateNode(List<Stream> sources, Node newNode) { registerSourcedNode(sources, newNode); return new TridentState(this, newNode); }
public static StormTopology buildTopology(LocalDRPC drpc) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person")); spout.setCycle(true); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16) .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) .parallelismHint(16); topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); return topology.build(); }
public static StormTopology build(TridentKafkaConfig kafkaConf) { TridentTopology topology = new TridentTopology(); topology // reading and parsing events .newStream("occupancy", new OpaqueTridentKafkaSpout(kafkaConf)) .each(new Fields("bytes"), new BytesToString(), new Fields("rawOccupancyEvent")) .each(new Fields("rawOccupancyEvent"), new EventBuilder(), new Fields("occupancyEvent")) // gathering "enter" and "leave" events into "presence periods" .each(new Fields("occupancyEvent"), new ExtractCorrelationId(), new Fields("correlationId")) .groupBy(new Fields("correlationId")) .persistentAggregate(PeriodBackingMap.FACTORY, new Fields("occupancyEvent"), new PeriodBuilder(), new Fields("presencePeriod")) .newValuesStream() // building room timelines as a state maintained in Cassandra .each(new Fields("presencePeriod"), new IsPeriodComplete()) .each(new Fields("presencePeriod"), new BuildHourlyUpdateInfo(), new Fields("roomId", "roundStartTime")) .groupBy(new Fields("roomId", "roundStartTime")) .persistentAggregate(TimelineBackingMap.FACTORY, new Fields("presencePeriod", "roomId", "roundStartTime"), new TimelineUpdater(), new Fields("hourlyTimeline")) ; return topology.build(); }
public static StormTopology buildTopology(LocalDRPC drpc) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person")); spout.setCycle(true); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper) .filter(theFilter).peek(new Consumer() { @Override public void accept(TridentTuple input) { System.out.println(input.getString(0)); } }).groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) .parallelismHint(16); topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args")) .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); return topology.build(); }
private static StormTopology advancedPrimitives(FeederBatchSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); // What if we want more than one aggregation? For that, we can use "chained" aggregations. // Note how we calculate count and sum. // The aggregated values can then be processed further, in this case into mean topology .newStream("aggregation", spout) .groupBy(new Fields("city")) .chainedAgg() .aggregate(new Count(), new Fields("count")) .aggregate(new Fields("age"), new Sum(), new Fields("age_sum")) .chainEnd() .each(new Fields("age_sum", "count"), new DivideAsDouble(), new Fields("mean_age")) .each(new Fields("city", "mean_age"), new Print()) ; // What if we want to persist results of an aggregation, but want to further process these // results? You can use "newValuesStream" for that topology .newStream("further",spout) .groupBy(new Fields("city")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) .newValuesStream() .each(new Fields("city", "count"), new Print()); return topology.build(); }
.each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(split_Parallelism_hint).groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) .parallelismHint(count_Parallelism_hint);
new AnalyzeVisits(), new Fields("domain", "isBounce")) .newValuesStream() .each(new Fields("isBounce"), new BooleanToInt(),
public TridentState newStaticState(StateSpec spec) { String stateId = getUniqueStateId(); Node n = new Node(getUniqueStreamId(), null, new Fields()); n.stateInfo = new NodeStateInfo(stateId, spec); registerNode(n); return new TridentState(this, n); }
public StormTopology buildTopology(LocalDRPC drpc) { TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(brokerHosts, "storm-sentence", "storm"); kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology.newStream("kafka", kafkaSpout).shuffle(). each(new Fields("str"), new WordSplit(), new Fields("word")). groupBy(new Fields("word")). persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("aggregates_words")).parallelismHint(2); topology.newDRPCStream("words", drpc) .each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); return topology.build(); }
protected TridentState addSourcedStateNode(List<Stream> sources, Node newNode) { registerSourcedNode(sources, newNode); return new TridentState(this, newNode); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .parallelismHint(3) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count")) .parallelismHint(3) ; topology .newDRPCStream("hashtag_count") .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass")) .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count")) ; return topology.build(); }
protected TridentState addSourcedStateNode(List<Stream> sources, Node newNode) { registerSourcedNode(sources, newNode); return new TridentState(this, newNode); }
public static StormTopology buildTopology(LocalDRPC drpc, StateFactory state) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person")); spout.setCycle(true); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology.newStream("spout1", spout) .each(new Fields("sentence"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate(state, new Count(), new Fields("count")) .parallelismHint(6); topology.newDRPCStream("words", drpc) .each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); return topology.build(); }
public TridentState newStaticState(StateSpec spec) { String stateId = getUniqueStateId(); Node n = new Node(getUniqueStreamId(), null, new Fields()); n.stateInfo = new NodeStateInfo(stateId, spec); registerNode(n); return new TridentState(this, n); }
public TridentState newStaticState(StateSpec spec) { String stateId = getUniqueStateId(); Node n = new Node(getUniqueStreamId(), null, new Fields()); n.stateInfo = new NodeStateInfo(stateId, spec); registerNode(n); return new TridentState(this, n); }