@Override public PCollection<T> expand(PCollectionTuple input) { return input.get(tag); } }
@Override public PCollection<FeatureRowExtended> expand(PCollectionTuple tuple) { List<PCollection<FeatureRowExtended>> outputList = Lists.newArrayList(); for (TupleTag<FeatureRowExtended> tag : transforms.keySet()) { Write write = transforms.get(tag); Preconditions.checkNotNull(write, String.format("Null transform for tag=%s", tag.getId())); PCollection<FeatureRowExtended> input = tuple.get(tag); input.apply(String.format("Write to %s", tag.getId()), write); outputList.add(input); } // FeatureRows with no matching write transform end up in `input.get(mainTag)` and considered // discardible, we return them in the main output so they are considered written, but don't // actually write them to any store. outputList.add(tuple.get(mainTag)); return PCollectionList.of(outputList).apply("Flatten main", Flatten.pCollections()); } }
@Override public PCollection<Long> expand(PCollection<Long> input) { return input.apply("Contained", replacementParDo).get(longs); } };
@Override public PCollection<Long> expand(PCollection<Long> input) { return input.apply("Contained", replacementParDo).get(longs); } };
@Override public PCollection<Row> buildIOReader(PBegin begin) { PCollectionTuple rowsWithDlq = begin .apply("readFromPubsub", readMessagesWithAttributes()) .apply("parseMessageToRow", createParserParDo()); rowsWithDlq.get(MAIN_TAG).setRowSchema(getSchema()); if (useDlq()) { rowsWithDlq.get(DLQ_TAG).apply(writeMessagesToDlq()); } return rowsWithDlq.get(MAIN_TAG); }
@Override public PCollection<OutputT> expand(PCollection<? extends InputT> input) { TupleTag<OutputT> mainOutput = new TupleTag<>(); return input.apply(original.withOutputTags(mainOutput, TupleTagList.empty())).get(mainOutput); } }
@Override public PDone expand(PCollectionTuple pCollectionTuple) { return pCollectionTuple .get(errorTag()) .apply(TextIO.write().to(errorWritePath()).withNumShards(1)); }
@Test public void testTypedOutputTupleTag() { TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; // typedOutputTag was constructed with compile-time type information. TupleTag<Integer> typedOutputTag = new TupleTag<Integer>() {}; PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, typedOutputTag); assertThat(tuple.get(typedOutputTag).getCoder(), instanceOf(VarIntCoder.class)); }
@Test public void testUntypedMainOutputTagTypedOutputTupleTag() { // mainOutputTag is allowed to be untyped because Coder can be inferred other ways. TupleTag<Integer> mainOutputTag = new TupleTag<>(); TupleTag<Integer> typedOutputTag = new TupleTag<Integer>() {}; PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, typedOutputTag); assertThat(tuple.get(typedOutputTag).getCoder(), instanceOf(VarIntCoder.class)); }
private void testAdditionalOutput(IsBounded bounded) { TupleTag<String> mainOutputTag = new TupleTag<String>("main") {}; TupleTag<String> additionalOutputTag = new TupleTag<String>("additional") {}; PCollectionTuple res = p.apply("input", Create.of(0, 1, 2)) .apply( ParDo.of(sdfWithAdditionalOutput(bounded, additionalOutputTag)) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); PAssert.that(res.get(mainOutputTag)) .containsInAnyOrder(Arrays.asList("main:0", "main:1", "main:2")); PAssert.that(res.get(additionalOutputTag)) .containsInAnyOrder(Arrays.asList("additional:0", "additional:1", "additional:2")); p.run(); }
@Test public void testTaggedOutputUnregisteredExplicitCoder() throws Exception { pipeline.enableAbandonedNodeEnforcement(false); PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3))); final TupleTag<Integer> mainOutputTag = new TupleTag<>("main"); final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unregisteredSide"); ParDo.MultiOutput<Integer, Integer> pardo = ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag)) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)); PCollectionTuple outputTuple = input.apply(pardo); outputTuple.get(additionalOutputTag).setCoder(new TestDummyCoder()); outputTuple.get(additionalOutputTag).apply(View.asSingleton()); assertEquals(new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder()); outputTuple .get(additionalOutputTag) .finishSpecifyingOutput("ParDo", input, pardo); // Check for crashes assertEquals( new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder()); // Check for corruption }
private PCollection<String> applySplittableParDo( String name, PCollection<Integer> input, DoFn<Integer, String> fn) { ParDo.MultiOutput<Integer, String> multiOutput = ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty()); PCollectionTuple output = multiOutput.expand(input); output.get(MAIN_OUTPUT_TAG).setName("main"); AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform = AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline); return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG); }
@Override public PCollectionList<T> expand(PCollection<T> in) { final TupleTagList outputTags = partitionDoFn.getOutputTags(); PCollectionTuple outputs = in.apply(ParDo.of(partitionDoFn).withOutputTags(new TupleTag<Void>() {}, outputTags)); PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline()); Coder<T> coder = in.getCoder(); for (TupleTag<?> outputTag : outputTags.getAll()) { // All the tuple tags are actually TupleTag<T> // And all the collections are actually PCollection<T> @SuppressWarnings("unchecked") TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag; pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder)); } return pcs; }
@Test public void testUntypedOutputTupleTagGivesActionableMessage() { TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; // untypedOutputTag did not use anonymous subclass. TupleTag<Integer> untypedOutputTag = new TupleTag<>(); PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, untypedOutputTag); thrown.expect(IllegalStateException.class); thrown.expectMessage("No Coder has been manually specified"); thrown.expectMessage("Building a Coder using a registered CoderProvider failed"); Coder<?> coder = tuple.get(untypedOutputTag).getCoder(); System.out.println(coder); }
@Test public void testStaticFactoryOutputTupleTagGivesActionableMessage() { TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; // untypedOutputTag constructed from a static factory method. TupleTag<Integer> untypedOutputTag = makeTagStatically(); PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, untypedOutputTag); thrown.expect(IllegalStateException.class); thrown.expectMessage("No Coder has been manually specified"); thrown.expectMessage("Building a Coder using a registered CoderProvider failed"); tuple.get(untypedOutputTag).getCoder(); }
@Test @Category(ValidatesRunner.class) public void testParDoWithEmptyTaggedOutput() { TupleTag<String> mainOutputTag = new TupleTag<String>("main") {}; TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {}; TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {}; PCollectionTuple outputs = pipeline .apply(Create.empty(VarIntCoder.of())) .apply( ParDo.of(new TestNoOutputDoFn()) .withOutputTags( mainOutputTag, TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2))); PAssert.that(outputs.get(mainOutputTag)).empty(); PAssert.that(outputs.get(additionalOutputTag1)).empty(); PAssert.that(outputs.get(additionalOutputTag2)).empty(); pipeline.run(); }
@Override public PCollectionTuple expand(PCollection<String> entityJson) { TupleTag<Entity> goodTag = new TupleTag<>(); PCollectionTuple entities = entityJson .apply("StringToEntity", ParDo.of(new JsonToEntity())) .apply("CheckSameKey", CheckSameKey.newBuilder() .setErrorTag(errorTag()) .setGoodTag(goodTag) .build()); entities.get(goodTag).apply("WriteToDatastore", DatastoreIO.v1().write() .withProjectId(projectId())); return entities; } }
@Test @Category(NeedsRunner.class) public void testMultiOutputChaining() { PCollectionTuple filters = pipeline.apply(Create.of(Arrays.asList(3, 4, 5, 6))).apply(new MultiFilter()); PCollection<Integer> by2 = filters.get(MultiFilter.BY2); PCollection<Integer> by3 = filters.get(MultiFilter.BY3); // Apply additional filters to each operation. PCollection<Integer> by2then3 = by2.apply("Filter3sAgain", ParDo.of(new MultiFilter.FilterFn(3))); PCollection<Integer> by3then2 = by3.apply("Filter2sAgain", ParDo.of(new MultiFilter.FilterFn(2))); PAssert.that(by2then3).containsInAnyOrder(6); PAssert.that(by3then2).containsInAnyOrder(6); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testMainOutputUnregisteredExplicitCoder() { PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3))); final TupleTag<TestDummy> mainOutputTag = new TupleTag<>("unregisteredMain"); final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {}; PCollectionTuple outputTuple = input.apply( ParDo.of(new MainOutputDummyFn(mainOutputTag, additionalOutputTag)) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder()); pipeline.run(); }
/** Tests that Pipeline supports putting an element into a tuple as a transform. */ @Test @Category(ValidatesRunner.class) public void testTupleInjectionTransform() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4)); TupleTag<Integer> tag = new TupleTag<>(); PCollectionTuple output = input.apply("ProjectTag", new TupleInjectionTransform<>(tag)); PAssert.that(output.get(tag)).containsInAnyOrder(1, 2, 3, 4); pipeline.run(); }