/** * Returns a singleton {@link PCollectionTuple} containing the given {@link PCollection} keyed by * the given {@link TupleTag}. * * <p>A {@link PCollectionTuple} containing additional elements can be created by calling {@link * #and} on the result. */ public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) { return empty(pc.getPipeline()).and(tag, pc); }
public static <T, V, W> PCollectionTuple tuple( String tag1, PCollection<T> pCollection1, String tag2, PCollection<V> pCollection2, String tag3, PCollection<W> pCollection3) { return tuple( tag1, pCollection1, tag2, pCollection2) .and(new TupleTag<>(tag3), pCollection3); } }
public static PCollection<String> write(PCollection<Read> shardedReads, HeaderInfo headerInfo, String output, Pipeline pipeline) { final PCollectionTuple tuple = PCollectionTuple .of(SHARDED_READS_TAG,shardedReads) .and(HEADER_TAG, pipeline.apply(Create.of(headerInfo).withCoder(HEADER_INFO_CODER))); return (new WriteBAMTransform(output, pipeline)).expand(tuple); }
public static PCollection<String> write(PCollection<Read> shardedReads, HeaderInfo headerInfo, String output, Pipeline pipeline) { final PCollectionTuple tuple = PCollectionTuple .of(SHARDED_READS_TAG,shardedReads) .and(HEADER_TAG, pipeline.apply(Create.of(headerInfo).withCoder(HEADER_INFO_CODER))); return (new WriteBAMTransform(output, pipeline)).expand(tuple); }
@Test public void taggedMissingReplacementThrows() { PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs).and(moreIntsTag, moreInts); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Missing replacement"); thrown.expectMessage(intsTag.toString()); thrown.expectMessage(ints.toString()); ReplacementOutputs.tagged( original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts)); }
@Test public void taggedExtraReplacementThrows() { PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Missing original output"); thrown.expectMessage(moreIntsTag.toString()); thrown.expectMessage(moreReplacementInts.toString()); ReplacementOutputs.tagged( original.expand(), PCollectionTuple.of(strsTag, replacementStrs) .and(moreIntsTag, moreReplacementInts) .and(intsTag, replacementInts)); } }
@Test public void taggedSucceeds() { PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs).and(moreIntsTag, moreInts); original.expand(), PCollectionTuple.of(strsTag, replacementStrs) .and(moreIntsTag, moreReplacementInts) .and(intsTag, replacementInts)); assertThat( replacements.keySet(),
@Override public PCollectionTuple expand(PCollection<Integer> input) { PCollection<Integer> by2 = input.apply("Filter2s", ParDo.of(new FilterFn(2))); PCollection<Integer> by3 = input.apply("Filter3s", ParDo.of(new FilterFn(3))); return PCollectionTuple.of(BY2, by2).and(BY3, by3); }
.build(); PCollectionTuple tuple = PCollectionTuple.of(intTag, ints).and(longTag, longs).and(strTag, strs); assertThat(tuple.getAll(), equalTo(pcsByTag)); PCollectionTuple reconstructed = PCollectionTuple.empty(p); assertThat("The tag should map back to the value", tuple.get(tag), equalTo(value)); assertThat(value, equalTo(pcsByTag.get(tag))); reconstructed = reconstructed.and(tag, (PCollection) value);
@Override public PCollectionTuple expand(PCollection<Integer> input) { PCollection<Integer> sum = input.apply(Sum.integersGlobally()); // Fails here when attempting to construct a tuple with an unbound object. return PCollectionTuple.of(sumTag, sum) .and( doneTag, PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), VoidCoder.of())); } }
final TupleTag<String> oneTag = new TupleTag<String>() {}; final TupleTag<Integer> twoTag = new TupleTag<Integer>() {}; final PCollectionTuple oneAndTwo = PCollectionTuple.of(oneTag, one).and(twoTag, two);
@Test public void testEquals() { TestPipeline p = TestPipeline.create(); TupleTag<Long> longTag = new TupleTag<>(); PCollection<Long> longs = p.apply(GenerateSequence.from(0)); TupleTag<String> strTag = new TupleTag<>(); PCollection<String> strs = p.apply(Create.of("foo", "bar")); EqualsTester tester = new EqualsTester(); // Empty tuples in the same pipeline are equal tester.addEqualityGroup(PCollectionTuple.empty(p), PCollectionTuple.empty(p)); tester.addEqualityGroup( PCollectionTuple.of(longTag, longs).and(strTag, strs), PCollectionTuple.of(longTag, longs).and(strTag, strs)); tester.addEqualityGroup(PCollectionTuple.of(longTag, longs)); tester.addEqualityGroup(PCollectionTuple.of(strTag, strs)); TestPipeline otherPipeline = TestPipeline.create(); // Empty tuples in different pipelines are not equal tester.addEqualityGroup(PCollectionTuple.empty(otherPipeline)); tester.testEquals(); }
.and(new TupleTag<>("orders"), orders) .apply( SqlTransform.query(
final TupleTag<String> oneTag = new TupleTag<String>() {}; final TupleTag<Integer> twoTag = new TupleTag<Integer>() {}; final PCollectionTuple oneAndTwo = PCollectionTuple.of(oneTag, one).and(twoTag, two);
@Test @Category(ValidatesRunner.class) public void testComposePCollectionTuple() { pipeline.enableAbandonedNodeEnforcement(true); List<Integer> inputs = Arrays.asList(3, -42, 666); TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main") {}; TupleTag<Integer> emptyOutputTag = new TupleTag<Integer>("empty") {}; final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("extra") {}; PCollection<Integer> mainInput = pipeline.apply(Create.of(inputs)); PCollectionTuple outputs = mainInput.apply( ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(additionalOutputTag, c.element()); } }) .withOutputTags(emptyOutputTag, TupleTagList.of(additionalOutputTag))); assertNotNull("outputs.getPipeline()", outputs.getPipeline()); outputs = outputs.and(mainOutputTag, mainInput); PAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder(inputs); PAssert.that(outputs.get(additionalOutputTag)).containsInAnyOrder(inputs); PAssert.that(outputs.get(emptyOutputTag)).empty(); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testMismatchingKeys() { PCollection<Row> pc1 = pipeline .apply( "Create1", Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 1, "us").build())) .setRowSchema(CG_SCHEMA_1); PCollection<Row> pc2 = pipeline .apply( "Create2", Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 9, "us").build())) .setRowSchema(CG_SCHEMA_1); TupleTag<Row> pc1Tag = new TupleTag<>("pc1"); TupleTag<Row> pc2Tag = new TupleTag<>("pc2"); thrown.expect(IllegalStateException.class); PCollection<KV<Row, Row>> joined = PCollectionTuple.of(pc1Tag, pc1) .and(pc2Tag, pc2) .apply("CoGroup", CoGroup.byFieldNames(pc1Tag, "user").byFieldNames(pc2Tag, "count")); pipeline.run(); }