sourceName = ((Read.Bounded<?>) transform).getSource().getClass().getName(); } else if (transform instanceof Read.Unbounded) { sourceName = ((Read.Unbounded<?>) transform).getSource().getClass().getName();
unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords());
@Test public void testDisplayData() { SerializableBoundedSource boundedSource = new SerializableBoundedSource() { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; SerializableUnboundedSource unboundedSource = new SerializableUnboundedSource() { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; Duration maxReadTime = Duration.standardMinutes(2345); Read.Bounded<String> bounded = Read.from(boundedSource); BoundedReadFromUnboundedSource<String> unbounded = Read.from(unboundedSource).withMaxNumRecords(1234).withMaxReadTime(maxReadTime); DisplayData boundedDisplayData = DisplayData.from(bounded); assertThat(boundedDisplayData, hasDisplayItem("source", boundedSource.getClass())); assertThat(boundedDisplayData, includesDisplayDataFor("source", boundedSource)); DisplayData unboundedDisplayData = DisplayData.from(unbounded); assertThat(unboundedDisplayData, hasDisplayItem("source", unboundedSource.getClass())); assertThat(unboundedDisplayData, includesDisplayDataFor("source", unboundedSource)); assertThat(unboundedDisplayData, hasDisplayItem("maxRecords", 1234)); assertThat(unboundedDisplayData, hasDisplayItem("maxReadTime", maxReadTime)); }
unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords());
context.getCurrentTransform().getFullName(), context.getPipelineOptions(), transform.getSource(), context.getExecutionEnvironment().getParallelism()); nonDedupSource = context .getExecutionEnvironment() .addSource(sourceWrapper).name(transform.getName()).returns(withIdTypeInfo); if (transform.getSource().requiresDeduping()) { source = nonDedupSource.keyBy( new ValueWithRecordIdKeySelector<T>()) "Error while translating UnboundedSource: " + transform.getSource(), e);
return input.apply(readUnbounded); } else { return input.apply(readUnbounded.withMaxNumRecords(getTo() - getFrom())); readUnbounded.withMaxReadTime(getMaxReadTime()); if (isRangeUnbounded) { return input.apply(withMaxReadTime);
@Test @Category(NeedsRunner.class) public void testUnboundedSourceRateSplits() throws Exception { int elementsPerPeriod = 10; Duration period = Duration.millis(5); long numElements = 1000; int numSplits = 10; UnboundedCountingSource initial = CountingSource.createUnboundedFrom(0).withRate(elementsPerPeriod, period); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); Instant startTime = Instant.now(); p.run(); Instant endTime = Instant.now(); // 500 ms if the readers are all initialized in parallel; 5000 ms if they are evaluated serially long expectedMinimumMillis = (numElements * period.getMillis()) / elementsPerPeriod; assertThat(expectedMinimumMillis, lessThan(endTime.getMillis() - startTime.getMillis())); }
@Test public void testUnboundedSourceSplits() throws Exception { int numElements = 1000; int numSplits = 10; // Coders must be specified explicitly here due to the way the transform // is used in the test. UnboundedSource<KafkaRecord<Integer, Long>, ?> initial = mkKafkaReadTransform(numElements, null) .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of()) .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of()) .makeSource(); List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)) .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>())) .apply("collection " + i, Values.create())); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
@Test @Category(NeedsRunner.class) public void testUnboundedSourceWithRate() { Duration period = Duration.millis(5); long numElements = 1000L; PCollection<Long> input = p.apply( Read.from( CountingSource.createUnboundedFrom(0) .withTimestampFn(new ValueAsTimestampFn()) .withRate(1, period)) .withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diffs only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); Instant started = Instant.now(); p.run(); Instant finished = Instant.now(); Duration expectedDuration = period.multipliedBy((int) numElements); assertThat(started.plus(expectedDuration).isBefore(finished), is(true)); }
@Test @SuppressWarnings({"rawtypes", "unchecked"}) public void testTranslate() { ReadUnboundedTranslator translator = new ReadUnboundedTranslator(); GearpumpPipelineOptions options = PipelineOptionsFactory.create().as(GearpumpPipelineOptions.class); Read.Unbounded transform = mock(Read.Unbounded.class); UnboundedSource source = mock(UnboundedSource.class); when(transform.getSource()).thenReturn(source); TranslationContext translationContext = mock(TranslationContext.class); when(translationContext.getPipelineOptions()).thenReturn(options); JavaStream stream = mock(JavaStream.class); PValue mockOutput = mock(PValue.class); when(translationContext.getOutput()).thenReturn(mockOutput); when(translationContext.getSourceStream(any(DataSource.class))).thenReturn(stream); translator.translate(transform, translationContext); verify(translationContext).getSourceStream(argThat(new UnboundedSourceWrapperMatcher())); verify(translationContext).setOutputStream(mockOutput, stream); } }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testUnboundedSourceSplits() throws Exception { long numElements = 1000; int numSplits = 10; UnboundedSource<Long, ?> initial = CountingSource.unbounded(); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
@Override public PCollection<T> expand(PBegin input) { checkArgument(getConnectionFactory() != null, "withConnectionFactory() is required"); checkArgument( getQueue() != null || getTopic() != null, "Either withQueue() or withTopic() is required"); checkArgument( getQueue() == null || getTopic() == null, "withQueue() and withTopic() are exclusive"); checkArgument(getMessageMapper() != null, "withMessageMapper() is required"); checkArgument(getCoder() != null, "withCoder() is required"); // handles unbounded source to bounded conversion if maxNumRecords is set. Unbounded<T> unbounded = org.apache.beam.sdk.io.Read.from(createSource()); PTransform<PBegin, PCollection<T>> transform = unbounded; if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) { transform = unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords()); } return input.getPipeline().apply(transform); }
@Test @Category(NeedsRunner.class) public void testBoundedToUnboundedSourceAdapter() throws Exception { long numElements = 100; BoundedSource<Long> boundedSource = CountingSource.upTo(numElements); UnboundedSource<Long, Checkpoint<Long>> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource); PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements)); // Count == numElements PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements); // Unique count == numElements PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally())) .isEqualTo(numElements); // Min == 0 PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L); // Max == numElements-1 PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1); p.run(); }
@Override public PCollection<KinesisRecord> expand(PBegin input) { Unbounded<KinesisRecord> unbounded = org.apache.beam.sdk.io.Read.from( new KinesisSource( getAWSClientsProvider(), getStreamName(), getInitialPosition(), getUpToDateThreshold(), getRequestRecordsLimit())); PTransform<PBegin, PCollection<KinesisRecord>> transform = unbounded; if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) { transform = unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords()); } return input.apply(transform); } }
@Test @Category(NeedsRunner.class) public void testUnboundedSourceTimestamps() { long numElements = 1000; PCollection<Long> input = p.apply( Read.from(CountingSource.unboundedWithTimestampFn(new ValueAsTimestampFn())) .withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diffs only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
public StreamingUnboundedRead(Read.Unbounded<T> transform) { this.source = transform.getSource(); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(Read.Unbounded.class) private static void unboundedReadTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Read.Unbounded<?> transform) { final IRVertex vertex = new BeamUnboundedSourceVertex<>(transform.getSource(), DisplayData.from(transform)); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
private void test(boolean dedup, boolean timeBound) throws Exception { TestCountingSource source = new TestCountingSource(Integer.MAX_VALUE).withoutSplitting(); if (dedup) { source = source.withDedup(); } PCollection<KV<Integer, Integer>> output = timeBound ? p.apply(Read.from(source).withMaxReadTime(Duration.millis(200))) : p.apply(Read.from(source).withMaxNumRecords(NUM_RECORDS)); // Because some of the NUM_RECORDS elements read are dupes, the final output // will only have output from 0 to n where n < NUM_RECORDS. PAssert.that(output).satisfies(new Checker(dedup, timeBound)); p.run(); }
@Override public PCollection<Message> expand(PBegin input) { org.apache.beam.sdk.io.Read.Unbounded<Message> unbounded = org.apache.beam.sdk.io.Read.from( new SqsUnboundedSource( this, new SqsConfiguration(input.getPipeline().getOptions().as(AwsOptions.class)))); PTransform<PBegin, PCollection<Message>> transform = unbounded; if (maxNumRecords() < Long.MAX_VALUE || maxReadTime() != null) { transform = unbounded.withMaxReadTime(maxReadTime()).withMaxNumRecords(maxNumRecords()); } return input.getPipeline().apply(transform); } }
@Override public PCollection<Message> expand(PBegin input) { checkArgument(addresses() != null, "withAddresses() is required"); org.apache.beam.sdk.io.Read.Unbounded<Message> unbounded = org.apache.beam.sdk.io.Read.from(new UnboundedAmqpSource(this)); PTransform<PBegin, PCollection<Message>> transform = unbounded; if (maxNumRecords() < Long.MAX_VALUE || maxReadTime() != null) { transform = unbounded.withMaxReadTime(maxReadTime()).withMaxNumRecords(maxNumRecords()); } return input.getPipeline().apply(transform); } }