storm.trident.Stream java code examples

Stream vehiclesStream = topology.newStream("spout1", spout).each(allFields, new Debug("##### vehicles"));
Stream slowVehiclesStream = vehiclesStream.min(new SpeedComparator()).each(vehicleField,
    new Debug("#### slowest vehicle"));
Stream slowDriversStream = slowVehiclesStream.project(driverField).each(driverField,
    new Debug("##### slowest driver"));
vehiclesStream.max(new SpeedComparator()).each(vehicleField, new Debug("#### fastest vehicle"))
    .project(driverField).each(driverField, new Debug("##### fastest driver"));
vehiclesStream.minBy(Vehicle.FIELD_NAME, new EfficiencyComparator()).each(vehicleField,
    new Debug("#### least efficient vehicle"));
vehiclesStream.maxBy(Vehicle.FIELD_NAME, new EfficiencyComparator()).each(vehicleField,
    new Debug("#### most efficient vehicle"));

private static List<GroupedStream> groupedStreams(List<Stream> streams, List<Fields> joinFields) {
  List<GroupedStream> ret = new ArrayList<>();
  for(int i=0; i<streams.size(); i++) {
    ret.add(streams.get(i).groupBy(joinFields.get(i)));
  }
  return ret;
}

public Stream aggregate(CombinerAggregator agg, Fields functionFields) {
  return aggregate(null, agg, functionFields);
}

@Override
public Stream apply(Stream input) {
  Fields outputFields = input.getOutputFields();
  return input.partitionAggregate(outputFields, _agg, outputFields)
        .global()
        .partitionAggregate(outputFields, _agg, outputFields);             
}

/**
 * Returns a stream consisting of the elements of this stream that match the given filter.
 *
 * @param filter the filter to apply to each trident tuple to determine if it should be included.
 * @return the new stream
 */
public Stream filter(Filter filter) {
  return each(getOutputFields(), filter);
}

public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) {
  return _stream.partitionBy(_groupFields).stateQuery(state, inputFields, function, functionFields);
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
  TridentTopology topology = new TridentTopology();
  TridentState count =
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .project(new Fields("content", "user"))
      .each(new Fields("content"), new OnlyHashtags())
      .each(new Fields("user"), new OnlyEnglish())
      .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
      .parallelismHint(3)
      .groupBy(new Fields("followerClass", "contentName"))
      .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
      .parallelismHint(3)
  ;
  topology
      .newDRPCStream("hashtag_count")
      .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
      .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
  ;
  return topology.build();
}

  stream = stream.each(function, new Fields(outputFields));
} else {
  stream = stream.each(new Fields(inputFields), function, new Fields(outputFields));
  stream = stream.each(new Fields(inputFields), filter);
if (inputFields.isEmpty()) {
  if (partitionAggregate) {
    stream = stream.partitionAggregate(
        combinerAggregator, new Fields(outputFields));
  } else {
    stream = stream.aggregate(
        combinerAggregator, new Fields(outputFields));
    stream = stream.partitionAggregate(
        new Fields(inputFields), combinerAggregator, new Fields(outputFields));
  } else {
    stream = stream.aggregate(
        new Fields(inputFields), combinerAggregator, new Fields(outputFields));
if (inputFields.isEmpty()) {
  if (partitionAggregate) {
    stream = stream.partitionAggregate(
        reducerAggregator, new Fields(outputFields));
  } else {
    stream = stream.aggregate(
        reducerAggregator, new Fields(outputFields));
    stream = stream.partitionAggregate(

public static StormTopology buildTopology(LocalDRPC drpc) {
  TridentTopology topology = new TridentTopology();
  TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
  TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));
  
  topology.newDRPCStream("reach", drpc)
      .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters"))
      .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle()
      .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers"))
      .each(new Fields("followers"), new ExpandList(), new Fields("follower")).groupBy(new Fields("follower"))
      .aggregate(new One(), new Fields("one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
  return topology.build();
}

topology.newStream("filter", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
  .each(new Fields("text", "actor"), new Utils.PrintFilter());
  .each(new Fields("text", "actor"), new UppercaseFunction(), new Fields("uppercased_text"))
  .each(new Fields("text", "uppercased_text"), new Utils.PrintFilter());
topology.newStream("parallel", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
  .parallelismHint(5).each(new Fields("text", "actor"), new Utils.PrintFilter());
topology.newStream("parallel_and_partitioned", spout).partitionBy(new Fields("actor"))
  .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
  .each(new Fields("text", "actor"), new Utils.PrintFilter());
topology.newStream("parallel_and_partitioned", spout).parallelismHint(1).shuffle()
  .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
  .each(new Fields("text", "actor"), new Utils.PrintFilter());
topology.newStream("aggregation", spout).parallelismHint(1)
  .aggregate(new Fields("location"), new LocationAggregator(), new Fields("aggregated_result"))
  .parallelismHint(5).each(new Fields("aggregated_result"), new Utils.PrintFilter());
  .parallelismHint(1)
  .shuffle()
  .partitionAggregate(new Fields("location"), new LocationAggregator(),
    new Fields("aggregated_result")).parallelismHint(6)
  .each(new Fields("aggregated_result"), new Utils.PrintFilter());
topology.newStream("aggregation", spout).parallelismHint(1).groupBy(new Fields("location"))
  .aggregate(new Fields("location"), new Count(), new Fields("count")).parallelismHint(5)

.each(new Fields("args"), new TextProcessor(),
    new Fields("textProcessed"))
.each(new Fields("textProcessed"), new VectorBuilder(),
    new Fields("tweet_obj", "uniqWordsIncrease"))
.broadcast()
.stateQuery(bucketsDB,
    new Fields("tweet_obj", "uniqWordsIncrease"),
    new BucketsStateQuery(),
    new Fields("tw_id", "collidingTweetsList"))
.parallelismHint(bucketsParallelism)
.each(new Fields("collidingTweetsList"), new ExpandList(),
    new Fields("coltweet_obj", "coltweetId"))
.groupBy(new Fields("tw_id", "coltweetId"))
.aggregate(
    new Fields("coltweetId", "tweet_obj", "coltweet_obj"),
.groupBy(new Fields("tw_id"))
.aggregate(
    new Fields("count", "coltweetId", "tweet_obj",
    new Fields("countAfter", "coltweetId", "tweet_obj",
        "coltweet_obj"))
.each(new Fields("tw_id", "coltweetId", "tweet_obj",
    "coltweet_obj"), new ComputeDistance(),
    new Fields("cosSim"))
.parallelismHint(computeDistance)
.shuffle()

.aggregate(new Fields("location"), new StringCounter(), new Fields("aggregated_result"))
.parallelismHint(3)
.partitionBy(new Fields("location"))
.partitionAggregate(new Fields("location"), new StringCounter(), new Fields("aggregated_result"))
.parallelismHint(3)
.partitionBy(new Fields("location"))
.partitionAggregate(new Fields("location"), new StringCounter(), new Fields("count_map"))
.each(new Fields("count_map"), new HasSpain())
.each(new Fields("count_map"), new Print("AFTER-HAS-SPAIN"))
.parallelismHint(3)
.shuffle()
.each(new Fields("count_map"), new TimesTen(), new Fields("count_map_times_ten"))
.each(new Fields("count_map_times_ten"), new Print("AFTER-TIMES-TEN"))
.parallelismHint(3)
.global()
.each(new Fields("actor"), new Print())
.parallelismHint(3)

public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
      new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"),
      new Values("four score and seven years ago"), new Values("how many apples can you eat"),
      new Values("to be or not to be the person"));
  spout.setCycle(true);
  
  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16)
      .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
      .parallelismHint(16);
      
  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
      .groupBy(new Fields("word"))
      .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
      .each(new Fields("count"), new FilterNull())
      .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}

"bounceRate",
new TransactionalTridentKafkaSpout(kafkaConfig))
.each(new Fields("url"),
   new NormalizeURL(),
   new Fields("normurl"))
.each(new Fields("normurl"),
   new ExtractDomain(),
   new Fields("domain"))
.partitionBy(new Fields("domain", "user"))
.partitionPersist(
    new MemoryMapState.Factory(),
    new Fields("domain", "user", "timestamp"),
    new Fields("domain", "isBounce"))
.newValuesStream()
.each(new Fields("isBounce"),
   new BooleanToInt(),
    new Fields("bint"))
.groupBy(new Fields("domain"))
.persistentAggregate(
    CassandraState.transactional(

public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
  FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100);
  TridentTopology topology = new TridentTopology();
  topology.newStream("spout", spout)
    .groupBy(new Fields("location"))
    .aggregate(new Fields("location"), new Count(), new Fields("count"))
    .each(new Fields("location", "count"), new Utils.PrintFilter());
  
  return topology.build();
}

      new TransactionalTridentKafkaSpout(
          kafkaConfig))
      .each(new Fields("url"),
          new NormalizeURL(),
          new Fields("normurl"))
      .each(new Fields("timestamp"),
          new ToHourBucket(),
          new Fields("bucket"))
      .project(new Fields("normurl", "bucket"));
stream.groupBy(new Fields("normurl", "bucket"))
   .persistentAggregate(
       state,

public static StormTopology buildTopology(LocalDRPC drpc, StateFactory state) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
      new Values("the cow jumped over the moon"), 
      new Values("the man went to the store and bought some candy"),
      new Values("four score and seven years ago"), 
      new Values("how many apples can you eat"), 
      new Values("to be or not to be the person"));
  spout.setCycle(true);
  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout)
      .each(new Fields("sentence"), new Split(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(state, new Count(), new Fields("count"))
      .parallelismHint(6);
  topology.newDRPCStream("words", drpc)
      .each(new Fields("args"), new Split(), new Fields("word"))
      .groupBy(new Fields("word"))
      .stateQuery(wordCounts, new Fields("word"), new MapGet(),
          new Fields("count"))
      .each(new Fields("count"), new FilterNull())
      .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}

public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"),
      new Values("four score and seven years ago"), new Values("how many apples can you eat"),
      new Values("to be or not to be the person"));
  spout.setCycle(true);
  
  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper)
      .filter(theFilter).peek(new Consumer() {
        @Override
        public void accept(TridentTuple input) {
          System.out.println(input.getString(0));
        }
      }).groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
      .parallelismHint(16);
      
  topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args"))
      .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull())
      .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout)
    throws IOException {
  TridentTopology topology = new TridentTopology();
  /**
   * As a first thing, we need a stream of tweets which we can parse and extract
   * only the text and its id. As you will notice, we're going to store the stream
   * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}.
   * Check their implementations for details.
   */
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId"))
      .project(new Fields("tweetId", "text"))
      .each(new Fields("tweetId", "text"), new Print())
      .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater());
  /**
   * Now we need a DRPC stream to query the state where the tweets are stored.
   * To do that, as shown below, we need an implementation of {@link QueryFunction} to
   * access our {@link ElasticSearchState}.
   */
  TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory());
  topology
      .newDRPCStream("search")
      .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments
      .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters
      .project(new Fields("ids"));
  return topology.build();
}

TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(spout_Parallelism_hint)
    .each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(split_Parallelism_hint).groupBy(new Fields("word"))
    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
    .parallelismHint(count_Parallelism_hint);

Most used methods

each
groupBy
## Grouping Operation
project
Filters out fields from a stream, resulting in a Stream containing only the fields specified by `kee
aggregate
partitionBy
## Repartitioning Operation
shuffle
## Repartitioning Operation Use random round robin algorithm to evenly redistribute tuples across al
stateQuery
parallelismHint
Applies a parallelism hint to a stream.
partitionAggregate
partitionPersist
global
## Repartitioning Operation All tuples are sent to the same partition. The same partition is chosen
batchGlobal
## Repartitioning Operation All tuples in the batch are sent to the same partition. Different batche

Popular in Java

Making http post requests using okhttp
getApplicationContext (Context)
findViewById (Activity)
onRequestPermissionsResult (Fragment)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Top plugins for Android Studio

How to useStream in storm.trident

Best Java code snippets using storm.trident.Stream (Showing top 20 results out of 315)

How to use
Stream
in
storm.trident