@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) { return input .apply( "LeaderboardUserGlobalWindow", Window.<GameActionInfo>into(new GlobalWindows()) // Get periodic results every ten minutes. .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES))) .accumulatingFiredPanes() .withAllowedLateness(allowedLateness)) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")); } }
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.standardDays(1000)) .discardingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
private <SignalT> PCollectionView<?> expandTyped(PCollection<SignalT> input) { return input .apply(Window.<SignalT>configure().triggering(Never.ever()).discardingFiredPanes()) // Perform a per-window pre-combine so that our performance does not critically depend // on combiner lifting. .apply(ParDo.of(new CollectWindowsFn<>())) .apply(Sample.any(1)) .apply(View.asList()); } }
@Override public PCollection<Void> expand(PCollection<KV<K, V>> input) { int numShards = spec.getNumShards(); if (numShards <= 0) { try (Consumer<?, ?> consumer = openConsumer(spec)) { numShards = consumer.partitionsFor(spec.getTopic()).size(); LOG.info( "Using {} shards for exactly-once writer, matching number of partitions " + "for topic '{}'", numShards, spec.getTopic()); } } checkState(numShards > 0, "Could not set number of shards"); return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) // Everything into global window. .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply( String.format("Shuffle across %d shards", numShards), ParDo.of(new Reshard<>(numShards))) .apply("Persist sharding", GroupByKey.create()) .apply("Assign sequential ids", ParDo.of(new Sequencer<>())) .apply("Persist ids", GroupByKey.create()) .apply( String.format("Write to Kafka topic '%s'", spec.getTopic()), ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder()))); }
@Override public <T> PTransform<PCollection<T>, PCollection<T>> windowDummy() { return Window.into(windowFn); }
@Override public PCollection<String> expand(PCollection<String> in) { return in.apply( "Window", Window.<String>into(windowFn).withTimestampCombiner(TimestampCombiner.EARLIEST)) .apply(Count.perElement()) .apply("FormatCounts", ParDo.of(new FormatCountsDoFn())) .setCoder(StringUtf8Coder.of()); } }
.apply(Window.<KV<String, String>>into( FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .withAllowedLateness(Duration.ZERO).discardingFiredPanes()); .apply(Window.<KV<String, Integer>>into( FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .withAllowedLateness(Duration.ZERO).discardingFiredPanes());
@Test public void testDisplayDataExcludesDefaults() { Window<?> window = Window.into(new GlobalWindows()) .triggering(DefaultTrigger.of()) .withAllowedLateness(Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis())); DisplayData data = DisplayData.from(window); assertThat(data, not(hasDisplayItem("trigger"))); assertThat(data, not(hasDisplayItem("allowedLateness"))); }
PCollection<KV<Integer, Iterable<ValueInSingleWindow<T>>>>> removeTriggering = Window.<KV<Integer, Iterable<ValueInSingleWindow<T>>>>configure() .triggering(Never.ever()) .discardingFiredPanes() .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()); .apply( "NeverTrigger", Window.<KV<Integer, Iterable<ValueInSingleWindow<T>>>>configure() .triggering(Never.ever()) .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()) .discardingFiredPanes()) .apply("GroupDummyAndContents", GroupByKey.create());
@Test public void testDisplayDataExcludesUnspecifiedProperties() { Window<?> onlyHasAccumulationMode = Window.configure().discardingFiredPanes(); assertThat( DisplayData.from(onlyHasAccumulationMode), not( hasDisplayItem( hasKey( isOneOf( "windowFn", "trigger", "timestampCombiner", "allowedLateness", "closingBehavior"))))); Window<?> noAccumulationMode = Window.into(new GlobalWindows()); assertThat( DisplayData.from(noAccumulationMode), not(hasDisplayItem(hasKey("accumulationMode")))); }
@Test public void testMissingModeViaLateness() { FixedWindows fixed = FixedWindows.of(Duration.standardMinutes(10)); PCollection<String> input = pipeline .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of())) .apply("Window", Window.into(fixed)); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("allowed lateness"); thrown.expectMessage("accumulation mode be specified"); input.apply( "Lateness", Window.<String>configure().withAllowedLateness(Duration.standardDays(1))); }
@Test public void testUnsupportedGlobalWindowWithDefaultTrigger() { exceptions.expect(UnsupportedOperationException.class); pipeline.enableAbandonedNodeEnforcement(false); PCollection<Row> input = unboundedInput1.apply( "unboundedInput1.globalWindow", Window.<Row>into(new GlobalWindows()).triggering(DefaultTrigger.of())); String sql = "SELECT f_int2, COUNT(*) AS `size` FROM PCOLLECTION GROUP BY f_int2"; input.apply("testUnsupportedGlobalWindows", SqlTransform.query(sql)); }
@Test public void testWindowIntoAccumulatingLatenessNoTrigger() { FixedWindows fixed = FixedWindows.of(Duration.standardMinutes(10)); WindowingStrategy<?, ?> strategy = pipeline .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of())) .apply( "Lateness", Window.<String>into(fixed) .withAllowedLateness(Duration.standardDays(1)) .accumulatingFiredPanes()) .getWindowingStrategy(); assertThat(strategy.isTriggerSpecified(), is(false)); assertThat(strategy.isModeSpecified(), is(true)); assertThat(strategy.isAllowedLatenessSpecified(), is(true)); assertThat(strategy.getMode(), equalTo(AccumulationMode.ACCUMULATING_FIRED_PANES)); assertThat(strategy.getAllowedLateness(), equalTo(Duration.standardDays(1))); }
@Override public PCollection<Void> expand(PCollection<KV<K, V>> input) { int numShards = spec.getNumShards(); if (numShards <= 0) { try (Consumer<?, ?> consumer = openConsumer(spec)) { numShards = consumer.partitionsFor(spec.getTopic()).size(); LOG.info( "Using {} shards for exactly-once writer, matching number of partitions " + "for topic '{}'", numShards, spec.getTopic()); } } checkState(numShards > 0, "Could not set number of shards"); return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) // Everything into global window. .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply( String.format("Shuffle across %d shards", numShards), ParDo.of(new Reshard<>(numShards))) .apply("Persist sharding", GroupByKey.create()) .apply("Assign sequential ids", ParDo.of(new Sequencer<>())) .apply("Persist ids", GroupByKey.create()) .apply( String.format("Write to Kafka topic '%s'", spec.getTopic()), ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder()))); }
@Override public <T> PTransform<PCollection<T>, PCollection<T>> windowActuals() { return Window.into(windowFn.intoOnlyExisting()); } }
.apply( "WindowClicks", Window.<KV<Integer, String>>into(FixedWindows.of(new Duration(4))) .withTimestampCombiner(TimestampCombiner.EARLIEST)); .apply( "WindowPurchases", Window.<KV<Integer, String>>into(FixedWindows.of(new Duration(4))) .withTimestampCombiner(TimestampCombiner.EARLIEST));
p.apply(stream) .apply( Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))) .withAllowedLateness(Duration.standardMinutes(1)) .discardingFiredPanes());
Window.<KV<K, WindowedValue<KV<K, InputT>>>>configure() .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes() .withAllowedLateness(inputWindowingStrategy.getAllowedLateness()) .withTimestampCombiner(TimestampCombiner.EARLIEST))
private void testOutputAfterCheckpoint(IsBounded bounded) { PCollection<Integer> outputs = p.apply(Create.of("foo")) .apply(ParDo.of(sdfWithMultipleOutputsPerBlock(bounded, 3))) .apply(Window.<Integer>configure().triggering(Never.ever()).discardingFiredPanes()); PAssert.thatSingleton(outputs.apply(Count.globally())) .isEqualTo((long) SDFWithMultipleOutputsPerBlockBase.MAX_INDEX); p.run(); }