@Override public Map<PValue, ReplacementOutput> mapOutputs( Map<TupleTag<?>, PValue> outputs, PCollection<Long> newOutput) { Map.Entry<TupleTag<?>, PValue> original = Iterables.getOnlyElement(outputs.entrySet()); Map.Entry<TupleTag<?>, PValue> replacement = Iterables.getOnlyElement(newOutput.expand().entrySet()); return Collections.singletonMap( newOutput, ReplacementOutput.of( TaggedPValue.of(original.getKey(), original.getValue()), TaggedPValue.of(replacement.getKey(), replacement.getValue()))); } }
@Override public Map<PValue, ReplacementOutput> mapOutputs( Map<TupleTag<?>, PValue> outputs, PCollection<T> newOutput) { Map.Entry<TupleTag<?>, PValue> original = Iterables.getOnlyElement(outputs.entrySet()); Map.Entry<TupleTag<?>, PValue> replacement = Iterables.getOnlyElement(newOutput.expand().entrySet()); return Collections.singletonMap( newOutput, ReplacementOutput.of( TaggedPValue.of(original.getKey(), original.getValue()), TaggedPValue.of(replacement.getKey(), replacement.getValue()))); } }
@Test public void singletonSucceeds() { Map<PValue, ReplacementOutput> replacements = ReplacementOutputs.singleton(ints.expand(), replacementInts); assertThat(replacements, Matchers.hasKey(replacementInts)); ReplacementOutput replacement = replacements.get(replacementInts); Map.Entry<TupleTag<?>, PValue> taggedInts = Iterables.getOnlyElement(ints.expand().entrySet()); assertThat(replacement.getOriginal().getTag(), equalTo(taggedInts.getKey())); assertThat(replacement.getOriginal().getValue(), equalTo(taggedInts.getValue())); assertThat(replacement.getReplacement().getValue(), equalTo(replacementInts)); }
@Test public void registerTransformNoChildren() throws IOException { Create.Values<Integer> create = Create.of(1, 2, 3); PCollection<Integer> pt = pipeline.apply(create); String userName = "my_transform/my_nesting"; AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of(userName, pipeline.begin().expand(), pt.expand(), create, pipeline); String componentName = components.registerPTransform(transform, Collections.emptyList()); assertThat(componentName, equalTo(userName)); assertThat(components.getExistingPTransformId(transform), equalTo(componentName)); }
private static AppliedPTransform<?, ?, ?> generateSequence(Pipeline pipeline) { GenerateSequence sequence = GenerateSequence.from(0); PCollection<Long> pcollection = pipeline.apply(sequence); return AppliedPTransform.of( "Count", pipeline.begin().expand(), pcollection.expand(), sequence, pipeline); }
/** Tests that trying to register a transform which has unregistered children throws. */ @Test public void registerTransformWithUnregisteredChildren() throws IOException { Create.Values<Long> create = Create.of(1L, 2L, 3L); GenerateSequence createChild = GenerateSequence.from(0); PCollection<Long> pt = pipeline.apply(create); String userName = "my_transform"; String childUserName = "my_transform/my_nesting"; AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of(userName, pipeline.begin().expand(), pt.expand(), create, pipeline); AppliedPTransform<?, ?, ?> childTransform = AppliedPTransform.of( childUserName, pipeline.begin().expand(), pt.expand(), createChild, pipeline); thrown.expect(IllegalArgumentException.class); thrown.expectMessage(childTransform.toString()); components.registerPTransform(transform, Collections.singletonList(childTransform)); }
private static AppliedPTransform<?, ?, ?> read(Pipeline pipeline) { Read.Unbounded<Long> transform = Read.from(CountingSource.unbounded()); PCollection<Long> pcollection = pipeline.apply(transform); return AppliedPTransform.of( "ReadTheCount", pipeline.begin().expand(), pcollection.expand(), transform, pipeline); }
@Test public void registerTransformEmptyFullName() throws IOException { Create.Values<Integer> create = Create.of(1, 2, 3); PCollection<Integer> pt = pipeline.apply(create); AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of("", pipeline.begin().expand(), pt.expand(), create, pipeline); thrown.expect(IllegalArgumentException.class); thrown.expectMessage(transform.toString()); components.getExistingPTransformId(transform); }
@Test public void registerTransformAfterChildren() throws IOException { Create.Values<Long> create = Create.of(1L, 2L, 3L); GenerateSequence createChild = GenerateSequence.from(0); PCollection<Long> pt = pipeline.apply(create); String userName = "my_transform"; String childUserName = "my_transform/my_nesting"; AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of(userName, pipeline.begin().expand(), pt.expand(), create, pipeline); AppliedPTransform<?, ?, ?> childTransform = AppliedPTransform.of( childUserName, pipeline.begin().expand(), pt.expand(), createChild, pipeline); String childId = components.registerPTransform(childTransform, Collections.emptyList()); String parentId = components.registerPTransform(transform, Collections.singletonList(childTransform)); Components components = this.components.toComponents(); assertThat(components.getTransformsOrThrow(parentId).getSubtransforms(0), equalTo(childId)); assertThat(components.getTransformsOrThrow(childId).getSubtransformsCount(), equalTo(0)); }
/** * Gets the {@link AppliedPTransform} that has a created {@code PCollection<KV<String, Integer>>} * as input. */ private AppliedPTransform<?, ?, ?> getAppliedTransform(PTransform pardo) { PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); input.setName("dummy input"); PCollection<Integer> output = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of()); output.setName("dummy output"); return AppliedPTransform.of("pardo", input.expand(), output.expand(), pardo, p); }
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) { PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.asSingleton()); PCollection<Long> input = pipeline.apply(GenerateSequence.from(0)); ParDo.MultiOutput<Long, KV<Long, String>> parDo = ParDo.of(new TestDoFn()) .withSideInputs(view) .withOutputTags( new TupleTag<KV<Long, String>>() {}, TupleTagList.of(new TupleTag<KV<String, Long>>() {})); PCollectionTuple output = input.apply(parDo); Map<TupleTag<?>, PValue> inputs = new HashMap<>(); inputs.putAll(parDo.getAdditionalInputs()); inputs.putAll(input.expand()); return AppliedPTransform .<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of( "MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline); } }
@Test public void testExtractionDirectFromTransform() throws Exception { PCollection<String> input = p.apply(Create.of("hello")); WriteFilesResult<Void> output = input.apply(writeFiles); AppliedPTransform<PCollection<String>, WriteFilesResult<Void>, WriteFiles<String, Void, String>> appliedPTransform = AppliedPTransform.of("foo", input.expand(), output.expand(), writeFiles, p); assertThat( WriteFilesTranslation.isRunnerDeterminedSharding(appliedPTransform), equalTo( writeFiles.getNumShardsProvider() == null && writeFiles.getComputeNumShards() == null)); assertThat( WriteFilesTranslation.isWindowedWrites(appliedPTransform), equalTo(writeFiles.getWindowedWrites())); assertThat( WriteFilesTranslation.<String, Void, String>getSink(appliedPTransform), equalTo(writeFiles.getSink())); }
@Test public void getRootTransformsContainsEmptyFlatten() { PCollections<String> flatten = Flatten.pCollections(); PCollectionList<String> emptyList = PCollectionList.empty(p); PCollection<String> empty = emptyList.apply(flatten); empty.setCoder(StringUtf8Coder.of()); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); assertThat( graph.getRootTransforms(), Matchers.containsInAnyOrder(new Object[] {graph.getProducer(empty)})); AppliedPTransform<?, ?, ?> onlyRoot = Iterables.getOnlyElement(graph.getRootTransforms()); assertThat((Object) onlyRoot.getTransform(), equalTo(flatten)); assertThat(onlyRoot.getInputs().entrySet(), emptyIterable()); assertThat(onlyRoot.getOutputs(), equalTo(empty.expand())); }
@Test public void registerTransformNullComponents() throws IOException { Create.Values<Integer> create = Create.of(1, 2, 3); PCollection<Integer> pt = pipeline.apply(create); String userName = "my_transform/my_nesting"; AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of(userName, pipeline.begin().expand(), pt.expand(), create, pipeline); thrown.expect(NullPointerException.class); thrown.expectMessage("child nodes may not be null"); components.registerPTransform(transform, null); }
private PCollection<String> applySplittableParDo( String name, PCollection<Integer> input, DoFn<Integer, String> fn) { ParDo.MultiOutput<Integer, String> multiOutput = ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty()); PCollectionTuple output = multiOutput.expand(input); output.get(MAIN_OUTPUT_TAG).setName("main"); AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform = AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline); return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG); }
@Test public void mapOutputsSucceeds() { PCollection<Long> original = pipeline.apply("Original", GenerateSequence.from(0)); PCollection<Long> replacement = pipeline.apply("Replacement", GenerateSequence.from(0)); Map<PValue, ReplacementOutput> mapping = factory.mapOutputs(original.expand(), replacement); assertThat( mapping, Matchers.hasEntry( replacement, ReplacementOutput.of( TaggedPValue.ofExpandedValue(original), TaggedPValue.ofExpandedValue(replacement)))); }
@Test public void testRegistrarEncodedProto() throws Exception { PCollection<String> output = p.apply(testStream); AppliedPTransform<PBegin, PCollection<String>, TestStream<String>> appliedTestStream = AppliedPTransform.of("fakeName", PBegin.in(p).expand(), output.expand(), testStream, p); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.FunctionSpec spec = PTransformTranslation.toProto(appliedTestStream, components).getSpec(); assertThat(spec.getUrn(), equalTo(TEST_STREAM_TRANSFORM_URN)); RunnerApi.TestStreamPayload payload = TestStreamPayload.parseFrom(spec.getPayload()); verifyTestStreamEncoding( testStream, payload, RehydratedComponents.forComponents(components.toComponents())); }
@Test public void testMapOutputs() { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); PCollection<Integer> output = input.apply("Map", MapElements.via(fn)); PCollection<Integer> reappliedOutput = input.apply("ReMap", MapElements.via(fn)); Map<PValue, ReplacementOutput> replacementMap = factory.mapOutputs(output.expand(), reappliedOutput); assertThat( replacementMap, Matchers.hasEntry( reappliedOutput, ReplacementOutput.of( TaggedPValue.ofExpandedValue(output), TaggedPValue.ofExpandedValue(reappliedOutput)))); }
@Test public void outputMapping() { final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); PCollection<String> original = inputList.apply(Flatten.pCollections()); PCollection<String> replacement = inputList.apply(new FlattenWithoutDuplicateInputs<>()); assertThat( factory.mapOutputs(original.expand(), replacement), Matchers.hasEntry( replacement, ReplacementOutput.of( TaggedPValue.ofExpandedValue(original), TaggedPValue.ofExpandedValue(replacement)))); } }