com.hazelcast.jet.core.processor.Processors java code examples

public static <C, T, R> ProcessorTransform flatMapUsingContextTransform(
    @Nonnull Transform upstream,
    @Nonnull ContextFactory<C> contextFactory,
    @Nonnull DistributedBiFunction<? super C, ? super T, ? extends Traverser<? extends R>> flatMapFn
) {
  return new ProcessorTransform("flatMapUsingContext", upstream,
      flatMapUsingContextP(contextFactory, flatMapFn));
}

  private static DistributedSupplier<Processor> formatOutput() {
    return () -> {
      // If DateTimeFormatter was serializable, it could be created in
      // buildDag() and simply captured by the serializable lambda below. Since
      // it isn't, we need this long-hand approach that explicitly creates the
      // formatter at the use site instead of having it implicitly deserialized.
      DateTimeFormatter timeFormat = DateTimeFormatter.ofPattern("HH:mm:ss.SSS");
      return Processors.mapP((TimestampedEntry<String, Long> f) -> String.format("%s %5s %4d",
          timeFormat.format(Instant.ofEpochMilli(f.getTimestamp()).atZone(ZoneId.systemDefault())),
          f.getKey(), f.getValue())).get();
    };
  }
}

public static <C, T, R> ProcessorTransform mapUsingContextTransform(
    @Nonnull Transform upstream,
    @Nonnull ContextFactory<C> contextFactory,
    @Nonnull DistributedBiFunction<? super C, ? super T, ? extends R> mapFn
) {
  return new ProcessorTransform("mapUsingContext", upstream, mapUsingContextP(contextFactory, mapFn));
}

    flatMapUsingContextP(
        ContextFactory.withCreateFn(jet -> null).nonCooperative(),
        (Object ctx, Entry<?, String> e) -> traverseStream(docLines(e.getValue())))
    flatMapP((String line) -> traverseArray(delimiter.split(line.toLowerCase()))
        .filter(word -> !word.isEmpty()))
);
Vertex accumulate = dag.newVertex("accumulate", accumulateByKeyP(singletonList(wholeItem()), counting()));
Vertex combine = dag.newVertex("combine", combineByKeyP(counting(), Util::entry));

Vertex docCount = dag.newVertex("doc-count", Processors.aggregateP(counting()));
    flatMapUsingContextP(
        ContextFactory.withCreateFn(jet -> null).nonCooperative(),
        (Object ctx, Entry<Long, String> e) ->
Vertex tf = dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));

        )));
Vertex slidingStage1 = dag.newVertex("sliding-stage-1",
    Processors.accumulateByFrameP(
        singletonList(keyFn), singletonList(timestampFn), TimestampKind.EVENT,
        winPolicy, counting()
    ));
Vertex slidingStage2 = dag.newVertex("sliding-stage-2",
    Processors.combineToSlidingWindowP(winPolicy, counting(), TimestampedEntry::fromWindowResult));
Vertex formatOutput = dag.newVertex("format-output", mapUsingContextP(
    ContextFactory.withCreateFn(x -> DateTimeFormatter.ofPattern("HH:mm:ss.SSS")),
    (DateTimeFormatter timeFormat, TimestampedEntry<String, Long> tse) ->

@Nonnull
private static DAG buildDag(Map<String, Long> counts) {
  final Pattern delimiter = Pattern.compile("\\W+");
  DAG dag = new DAG();
  Vertex source = dag.newVertex("source", DocLinesP::new);
  Vertex tokenize = dag.newVertex("tokenize", flatMapP((String line) ->
          traverseArray(delimiter.split(line.toLowerCase()))
              .filter(word -> !word.isEmpty()))
  );
  Vertex aggregate = dag.newVertex("aggregate",
      aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
  Vertex sink = dag.newVertex("sink", () -> new MapSinkP(counts));
  return dag.edge(between(source.localParallelism(1), tokenize))
       .edge(between(tokenize, aggregate).partitioned(wholeItem(), HASH_CODE))
       .edge(between(aggregate, sink));
}

private void addSlidingWindowTwoStage(Planner p, SlidingWindowDef wDef) {
  String vertexName = p.uniqueVertexName(name());
  SlidingWindowPolicy winPolicy = wDef.toSlidingWindowPolicy();
  Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByFrameP(
      keyFns,
      nCopies(keyFns.size(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp),
      TimestampKind.EVENT,
      winPolicy,
      aggrOp));
  v1.localParallelism(localParallelism());
  PlannerVertex pv2 = p.addVertex(this, vertexName, localParallelism(),
      combineToSlidingWindowP(winPolicy, aggrOp, mapToOutputFn));
  p.addEdges(this, v1, (e, ord) -> e.partitioned(keyFns.get(ord), HASH_CODE));
  p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey()));
}

  private void addToDagTwoStage(Planner p) {
    List<DistributedFunction<?, ? extends K>> groupKeyFns = this.groupKeyFns;
    String vertexName = p.uniqueVertexName(this.name());
    Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByKeyP(groupKeyFns, aggrOp))
        .localParallelism(localParallelism());
    PlannerVertex pv2 = p.addVertex(this, vertexName, localParallelism(),
        combineByKeyP(aggrOp, mapToOutputFn));
    p.addEdges(this, v1, (e, ord) -> e.partitioned(groupKeyFns.get(ord), HASH_CODE));
    p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey()));
  }
}

private void addToDagSingleStage(Planner p) {
  PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), 1, aggregateP(aggrOp));
  p.addEdges(this, pv.v, edge -> edge.distributed().allToOne());
}

private void addToDagSingleStage(Planner p) {
  PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), localParallelism(),
      aggregateByKeyP(groupKeyFns, aggrOp, mapToOutputFn));
  p.addEdges(this, pv.v, (e, ord) -> e.distributed().partitioned(groupKeyFns.get(ord)));
}

private void addSlidingWindowSingleStage(Planner p, SlidingWindowDef wDef) {
  PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), localParallelism(),
      aggregateToSlidingWindowP(
          keyFns,
          nCopies(keyFns.size(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp),
          TimestampKind.EVENT,
          wDef.toSlidingWindowPolicy(),
          aggrOp,
          mapToOutputFn
      ));
  p.addEdges(this, pv.v, (e, ord) -> e.distributed().partitioned(keyFns.get(ord)));
}

private void addSlidingWindowTwoStage(Planner p, SlidingWindowDef wDef) {
  String vertexName = p.uniqueVertexName(name());
  SlidingWindowPolicy winPolicy = wDef.toSlidingWindowPolicy();
  Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByFrameP(
      nCopies(aggrOp.arity(), constantKey()),
      nCopies(aggrOp.arity(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp),
      TimestampKind.EVENT,
      winPolicy,
      aggrOp
  ));
  v1.localParallelism(localParallelism());
  PlannerVertex pv2 = p.addVertex(this, vertexName, 1,
      combineToSlidingWindowP(winPolicy, aggrOp, mapToOutputFn.toKeyedWindowResultFn()));
  p.addEdges(this, v1);
  p.dag.edge(between(v1, pv2.v).distributed().allToOne());
}

Vertex readTickerInfoMap = dag.newVertex("readTickerInfoMap", readMapP(TICKER_INFO_MAP_NAME));
Vertex collectToMap = dag.newVertex("collectToMap",
    Processors.aggregateP(AggregateOperations.toMap(entryKey(), entryValue())));
Vertex hashJoin = dag.newVertex("hashJoin", () -> new HashJoinP<>(Trade::getTicker));
Vertex sink = dag.newVertex("sink", writeLoggerP(o -> Arrays.toString((Object[]) o)));

private void addSlidingWindowSingleStage(Planner p, SlidingWindowDef wDef) {
  PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), 1,
      aggregateToSlidingWindowP(
          nCopies(aggrOp.arity(), constantKey()),
          nCopies(aggrOp.arity(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp),
          TimestampKind.EVENT,
          wDef.toSlidingWindowPolicy(),
          aggrOp,
          mapToOutputFn.toKeyedWindowResultFn()
      ));
  p.addEdges(this, pv.v, edge -> edge.distributed().allToOne());
}

  @Override
  public void addToDag(Planner p) {
    PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), localParallelism(), mapP(identity()));
    p.addEdges(this, pv.v);
  }
}

public static <C, T, K, R> PartitionedProcessorTransform<T, K> mapUsingContextPartitionedTransform(
    @Nonnull Transform upstream,
    @Nonnull ContextFactory<C> contextFactory,
    @Nonnull DistributedBiFunction<? super C, ? super T, ? extends R> mapFn,
    @Nonnull DistributedFunction<? super T, ? extends K> partitionKeyFn
) {
  return new PartitionedProcessorTransform<>("mapUsingPartitionedContext",
      upstream, mapUsingContextP(contextFactory, mapFn), partitionKeyFn);
}

public static <C, T, K, R> PartitionedProcessorTransform<T, K> flatMapUsingPartitionedContextTransform(
    @Nonnull Transform upstream,
    @Nonnull ContextFactory<C> contextFactory,
    @Nonnull DistributedBiFunction<? super C, ? super T, ? extends Traverser<? extends R>> flatMapFn,
    @Nonnull DistributedFunction<? super T, ? extends K> partitionKeyFn
) {
  return new PartitionedProcessorTransform<>("flatMapUsingPartitionedContext",
      upstream, flatMapUsingContextP(contextFactory, flatMapFn), partitionKeyFn);
}

        )));
Vertex slidingWindow = dag.newVertex("aggregate-to-sliding-win",
    aggregateToSlidingWindowP(
        singletonList((DistributedFunction<Trade, String>) Trade::getTicker),
        singletonList((DistributedToLongFunction<Trade>) Trade::getTime),

  @Override
  public void addToDag(Planner p) {
    PlannerVertex pv = p.addVertex(this, p.uniqueVertexName(name()), localParallelism(), mapP(mapFn()));
    p.addEdges(this, pv.v);
  }
}

Javadoc

Static utility class with factory methods for Jet processors. These are meant to implement the internal vertices of the DAG; for other kinds of processors refer to the com.hazelcast.jet.core.processor.

Many of the processors deal with an aggregating operation over stream items. Prior to aggregation items may be grouped by an arbitrary key and/or an event timestamp-based window. There are two main aggregation setups: single-stage and two-stage.

Unless specified otherwise, all functions passed to member methods must be stateless.

Single-stage aggregation This is the basic setup where all the aggregation steps happen in one vertex. The input must be properly partitioned and distributed. For non-aligned window aggregation (e.g., session-based, trigger-based, etc.) this is the only choice. In the case of aligned windows it is the best choice if the source is already partitioned by the grouping key because the inbound edge will not have to be distributed. If the input stream needs repartitioning, this setup will incur heavier network traffic than the two-stage setup due to the need for a distributed-partitioned edge. On the other hand, it will use less memory because each member keeps track only of the keys belonging to its own partitions. This is the DAG outline for the case where upstream data is not localized by grouping key:

 
----------------- 
| upstream vertex | 
----------------- 
| 
| partitioned-distributed 
V 
----------- 
| aggregate | 
-----------

Two-stage aggregation In two-stage aggregation, the first stage applies just the AggregateOperation1#accumulateFn() aggregation primitive and the second stage does AggregateOperation1#combineFn() and AggregateOperation1#finishFn(). The essential property of this setup is that the edge leading to the first stage is local, incurring no network traffic, and only the edge from the first to the second stage is distributed. There is only one item per group traveling on the distributed edge. Compared to the single-stage setup this can dramatically reduce network traffic, but it needs more memory to keep track of all keys on each cluster member. This is the outline of the DAG:

 
----------------- 
| upstream vertex | 
----------------- 
| 
| partitioned-local 
V 
------------ 
| accumulate | 
------------ 
| 
| partitioned-distributed 
V 
---------------- 
| combine/finish | 
----------------

The variants without a grouping key are equivalent to grouping by a single, global key. In that case the edge towards the final-stage vertex must be all-to-one and the local parallelism of the vertex must be one. Unless the volume of the aggregated data is small (e.g., some side branch off the main flow in the DAG), the best choice is this two-stage setup:

 
----------------- 
| upstream vertex | 
----------------- 
| 
| local, non-partitioned 
V 
------------ 
| accumulate | 
------------ 
| 
| distributed, all-to-one 
V 
---------------- 
| combine/finish | localParallelism = 1 
----------------

This will parallelize and distributed most of the processing and the second-stage processor will receive just a single item from each upstream processor, doing very little work. Overview of factory methods for aggregate operations

	single-stage	stage 1/2	stage 2/2
batch, no grouping	#aggregateP	#accumulateP	#combineP
batch, group by key	#aggregateByKeyP	#accumulateByKeyP	#combineByKeyP
batch, co-group by key	#aggregateByKeyP	#accumulateByKeyP
stream, group by key and aligned window	#aggregateToSlidingWindowP	#accumulateByFrameP	#combineToSlidingWindowP
stream, co-group by key and aligned window	#aggregateToSlidingWindowP	#accumulateByFrameP
stream, group by key and session window	#aggregateToSessionWindowP	N/A	N/A

Tumbling window is a special case of sliding window with sliding step = window size.

Most used methods

aggregateByKeyP
Returns a supplier of processors for a vertex that groups items by key and performs the provided agg
aggregateP
Returns a supplier of processors for a vertex that performs the provided aggregate operation on all
flatMapUsingContextP
Returns a supplier of processors for a vertex that applies the provided item-to-traverser mapping fu
mapP
Returns a supplier of processors for a vertex which, for each received item, emits the result of app
mapUsingContextP
Returns a supplier of processors for a vertex which, for each received item, emits the result of app
accumulateByFrameP
Returns a supplier of processors for the first-stage vertex in a two-stage sliding window aggregatio
accumulateByKeyP
Returns a supplier of processors for the first-stage vertex in a two-stage group-and-aggregate setup
aggregateToSlidingWindowP
Returns a supplier of processors for a vertex that aggregates events into a sliding window in a sing
combineByKeyP
Returns a supplier of processors for the second-stage vertex in a two-stage group-and-aggregate setu
combineToSlidingWindowP
Returns a supplier of processors for the second-stage vertex in a two-stage sliding window aggregati
flatMapP
Returns a supplier of processors for a vertex that applies the provided item-to-traverser mapping fu
noopP

Popular in Java

Reactive rest calls using spring rest template
getApplicationContext (Context)
runOnUiThread (Activity)
getExternalFilesDir (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
JTextField (javax.swing)
Github Copilot alternatives

How to useProcessors in com.hazelcast.jet.core.processor

Best Java code snippets using com.hazelcast.jet.core.processor.Processors (Showing top 20 results out of 315)

How to use
Processors
in
com.hazelcast.jet.core.processor