public Stream stateQuery(TridentState state, QueryFunction function, Fields functionFields) { return stateQuery(state, null, function, functionFields); }
public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) { return _stream.partitionBy(_groupFields).stateQuery(state, inputFields, function, functionFields); }
public static StormTopology buildTopology(LocalDRPC drpc) { TridentTopology topology = new TridentTopology(); TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB)); TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB)); topology.newDRPCStream("reach", drpc) .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters")) .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle() .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers")) .each(new Fields("followers"), new ExpandList(), new Fields("follower")).groupBy(new Fields("follower")) .aggregate(new One(), new Fields("one")).aggregate(new Fields("one"), new Sum(), new Fields("reach")); return topology.build(); }
public Stream stateQuery(TridentState state, QueryFunction function, Fields functionFields) { return stateQuery(state, null, function, functionFields); }
public Stream stateQuery(TridentState state, QueryFunction function, Fields functionFields) { return stateQuery(state, null, function, functionFields); }
public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) { return _stream.partitionBy(_groupFields) .stateQuery(state, inputFields, function, functionFields); }
public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) { return _stream.partitionBy(_groupFields).stateQuery(state, inputFields, function, functionFields); }
.stateQuery(bucketsDB, new Fields("tweet_obj", "uniqWordsIncrease"), new BucketsStateQuery(), .stateQuery(recentTweetsDB, new Fields("tweet_obj", "cosSimBckts"), new RecentTweetsStateQuery(),
public static void main(String[] args) throws Exception { Config conf = new Config(); // Submits the topology String topologyName = args[0]; conf.setNumWorkers(8); // Our Vagrant environment has 8 workers FakeTweetsBatchSpout fakeTweets = new FakeTweetsBatchSpout(10); TridentTopology topology = new TridentTopology(); TridentState countState = topology .newStream("spout", fakeTweets) .groupBy(new Fields("actor")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")); topology .newDRPCStream("count_per_actor") .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count")); StormSubmitter.submitTopology(topologyName, conf, topology.build()); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("top_hashtags") .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName")) .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("hashtag_count") .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName")) .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count")) .groupBy(new Fields("followerClass")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user")) .project(new Fields("content", "user", "status")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("status"), new OnlyGeo()) .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName")) .groupBy(new Fields("country", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("location_hashtag_count") .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName")) .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count")) .groupBy(new Fields("country")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
.stateQuery(countState, new TupleCollectionGet(), new Fields("actor", "location")) .stateQuery(nameToAge, new Fields("actor"), new MapGet(), new Fields("age")) .each(new Fields("actor","location","age"), new Print()) .groupBy(new Fields("location"))
.groupBy(new Fields("hashtag")) .stateQuery(hashTagCounts, new Fields("hashtag"), new MapGet(), new Fields("resultrt")) .stateQuery(sploutState, new Fields("hashtag", "resultrt"), new HashTagsSploutQuery(), new Fields("resultbatch")) .each(new Fields("hashtag", "resultrt", "resultbatch"), new LambdaMerge(), new Fields("result"))
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .parallelismHint(3) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count")) .parallelismHint(3) ; topology .newDRPCStream("hashtag_count") .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass")) .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); /** * As a first thing, we need a stream of tweets which we can parse and extract * only the text and its id. As you will notice, we're going to store the stream * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}. * Check their implementations for details. */ topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId")) .project(new Fields("tweetId", "text")) .each(new Fields("tweetId", "text"), new Print()) .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater()); /** * Now we need a DRPC stream to query the state where the tweets are stored. * To do that, as shown below, we need an implementation of {@link QueryFunction} to * access our {@link ElasticSearchState}. */ TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory()); topology .newDRPCStream("search") .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters .project(new Fields("ids")); return topology.build(); }
.stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));