/** * Like {@link #apply(String, PTransform)} but the transform node in the {@link Pipeline} graph * will be named according to {@link PTransform#getName}. * * @see #apply(String, PTransform) */ public <OutputT extends POutput> OutputT apply(PTransform<? super PBegin, OutputT> root) { return begin().apply(root); }
/** * Adds a root {@link PTransform}, such as {@link Read} or {@link Create}, to this {@link * Pipeline}. * * <p>The node in the {@link Pipeline} graph will use the provided {@code name}. This name is used * in various places, including the monitoring UI, logging, and to stably identify this node in * the {@link Pipeline} graph upon update. * * <p>Alias for {@code begin().apply(name, root)}. */ public <OutputT extends POutput> OutputT apply( String name, PTransform<? super PBegin, OutputT> root) { return begin().apply(name, root); }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform) { try { return PTransformReplacement.of( transform.getPipeline().begin(), new DirectTestStream<>(runner, TestStreamTranslation.getTestStream(transform))); } catch (IOException exc) { throw new RuntimeException( String.format( "Transform could not be converted to %s", TestStream.class.getSimpleName()), exc); } }
@Override public PCollection<Row> expand(PCollectionList<Row> input) { checkArgument( input.size() == 0, "Should not have received input for %s: %s", BeamIOSourceRel.class.getSimpleName(), input); return sqlTable.buildIOReader(input.getPipeline().begin()); } }
private static PCollection<Read> getReadsFromAPI() { final PCollection<Read> reads = p.begin() .apply(Create.of(Collections.singletonList(pipelineOptions.getReadGroupSetId()))) .apply(new ReadGroupStreamer(auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, SexChromosomeFilter.INCLUDE_XY)); return reads; }
private static PCollection<Read> getReadsFromAPI() { final PCollection<Read> reads = p.begin() .apply(Create.of(Collections.singletonList(pipelineOptions.getReadGroupSetId()))) .apply(new ReadGroupStreamer(auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, SexChromosomeFilter.INCLUDE_XY)); return reads; }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Read.Unbounded<T>> transform) { return PTransformReplacement.of( transform.getPipeline().begin(), new StreamingUnboundedRead<>(transform.getTransform())); }
@Override public PTransformReplacement<PBegin, PCollection<PubsubMessage>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<PubsubMessage>, PubsubUnboundedSource> transform) { return PTransformReplacement.of( transform.getPipeline().begin(), new StreamingPubsubIORead(transform.getTransform())); }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 0, "Should not have received input for %s: %s", BeamValuesRel.class.getSimpleName(), pinput); if (tuples.isEmpty()) { throw new IllegalStateException("Values with empty tuples!"); } Schema schema = CalciteUtils.toSchema(getRowType()); List<Row> rows = tuples.stream().map(tuple -> tupleToRow(schema, tuple)).collect(toList()); return pinput.getPipeline().begin().apply(Create.of(rows)).setRowSchema(schema); } }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Read.Bounded<T>> transform) { return PTransformReplacement.of( transform.getPipeline().begin(), new StreamingBoundedRead<>(transform.getTransform())); }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Create.Values<T>> transform) { Create.Values<T> original = transform.getTransform(); PCollection<T> output = (PCollection) Iterables.getOnlyElement(transform.getOutputs().values()); return PTransformReplacement.of( transform.getPipeline().begin(), new StreamingFnApiCreate<>(original, output)); }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Read.Bounded<T>> transform) { return PTransformReplacement.of( transform.getPipeline().begin(), JavaReadViaImpulse.bounded(transform.getTransform().getSource())); }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Values<T>> transform) { return PTransformReplacement.of( transform.getPipeline().begin(), new PrimitiveCreate<>( transform.getTransform(), ((PCollection<T>) Iterables.getOnlyElement(transform.getOutputs().values())) .getCoder())); }
@Override public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<T>, Create.Values<T>> transform) { return PTransformReplacement.of(transform.getPipeline().begin(), new EmptyFlatten<T>()); }
@Override public PCollection<PubsubMessage> expand(PBegin input) { return input .getPipeline() .begin() .apply(Read.from(new PubsubSource(this))) .apply( "PubsubUnboundedSource.Stats", ParDo.of( new StatsFn(pubsubFactory, subscription, topic, timestampAttribute, idAttribute))); }
@Override public PTransformReplacement<PBegin, PCollection<Long>> getReplacementTransform( AppliedPTransform<PBegin, PCollection<Long>, GenerateSequence> transform) { return PTransformReplacement.of(transform.getPipeline().begin(), Create.of(0L)); }
private static AppliedPTransform<?, ?, ?> generateSequence(Pipeline pipeline) { GenerateSequence sequence = GenerateSequence.from(0); PCollection<Long> pcollection = pipeline.apply(sequence); return AppliedPTransform.of( "Count", pipeline.begin().expand(), pcollection.expand(), sequence, pipeline); }
private static AppliedPTransform<?, ?, ?> read(Pipeline pipeline) { Read.Unbounded<Long> transform = Read.from(CountingSource.unbounded()); PCollection<Long> pcollection = pipeline.apply(transform); return AppliedPTransform.of( "ReadTheCount", pipeline.begin().expand(), pcollection.expand(), transform, pipeline); }
@Test public void testPartiallyBoundFailure() throws IOException { Pipeline p = Pipeline.create(buildPipelineOptions()); PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3)); thrown.expect(IllegalArgumentException.class); input.apply(new PartiallyBoundOutputCreator()); Assert.fail("Failure expected from use of partially bound output"); }
@Test public void testMultiGraphPipelineSerialization() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3)); input.apply(new UnrelatedOutputCreator()); input.apply(new UnboundOutputCreator()); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions( PipelineOptionsFactory.as(DataflowPipelineOptions.class)); // Check that translation doesn't fail. JobSpecification jobSpecification = t.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob()); }