@Override public PCollection<T> expand(PCollectionTuple input) { return input.get(tag); } }
private void runCompositeFilter(PCollection<Row> input) throws Exception { String sql = "SELECT * FROM TABLE_A" + " WHERE f_int > 1 AND (f_long < 3000 OR f_string = 'string_row3')"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testCompositeFilter", SqlTransform.query(sql)); PAssert.that(result).containsInAnyOrder(rowsInTableA.get(1), rowsInTableA.get(2)); pipeline.run().waitUntilFinish(); }
/** * Returns a singleton {@link PCollectionTuple} containing the given {@link PCollection} keyed by * the given {@link TupleTag}. * * <p>A {@link PCollectionTuple} containing additional elements can be created by calling {@link * #and} on the result. */ public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) { return empty(pc.getPipeline()).and(tag, pc); }
public static PCollection<String> write(PCollection<Read> shardedReads, HeaderInfo headerInfo, String output, Pipeline pipeline) { final PCollectionTuple tuple = PCollectionTuple .of(SHARDED_READS_TAG,shardedReads) .and(HEADER_TAG, pipeline.apply(Create.of(headerInfo).withCoder(HEADER_INFO_CODER))); return (new WriteBAMTransform(output, pipeline)).expand(tuple); }
.build(); PCollectionTuple tuple = PCollectionTuple.of(intTag, ints).and(longTag, longs).and(strTag, strs); assertThat(tuple.getAll(), equalTo(pcsByTag)); PCollectionTuple reconstructed = PCollectionTuple.empty(p); for (Entry<TupleTag<?>, PValue> taggedValue : tuple.expand().entrySet()) { TupleTag<?> tag = taggedValue.getKey(); PValue value = taggedValue.getValue(); assertThat("The tag should map back to the value", tuple.get(tag), equalTo(value)); assertThat(value, equalTo(pcsByTag.get(tag))); reconstructed = reconstructed.and(tag, (PCollection) value);
@Test public void taggedMissingReplacementThrows() { PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs).and(moreIntsTag, moreInts); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Missing replacement"); thrown.expectMessage(intsTag.toString()); thrown.expectMessage(ints.toString()); ReplacementOutputs.tagged( original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts)); }
@Test @Category(ValidatesRunner.class) public void testComposePCollectionTuple() { pipeline.enableAbandonedNodeEnforcement(true); List<Integer> inputs = Arrays.asList(3, -42, 666); TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main") {}; TupleTag<Integer> emptyOutputTag = new TupleTag<Integer>("empty") {}; final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("extra") {}; PCollection<Integer> mainInput = pipeline.apply(Create.of(inputs)); PCollectionTuple outputs = mainInput.apply( ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(additionalOutputTag, c.element()); } }) .withOutputTags(emptyOutputTag, TupleTagList.of(additionalOutputTag))); assertNotNull("outputs.getPipeline()", outputs.getPipeline()); outputs = outputs.and(mainOutputTag, mainInput); PAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder(inputs); PAssert.that(outputs.get(additionalOutputTag)).containsInAnyOrder(inputs); PAssert.that(outputs.get(emptyOutputTag)).empty(); pipeline.run(); }
PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()), @SuppressWarnings("unchecked") Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder(); for (PCollection<?> out : outputs.getAll().values()) { try { out.setCoder( outputs.get(mainOutputTag).setTypeDescriptor(getFn().getOutputTypeDescriptor());
public static <OutputT> PCollectionTuple createPrimitiveOutputFor( PCollection<?> input, DoFn<?, OutputT> fn, TupleTag<OutputT> mainOutputTag, TupleTagList additionalOutputTags, Map<TupleTag<?>, Coder<?>> outputTagsToCoders, WindowingStrategy<?, ?> windowingStrategy) { DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()), outputTagsToCoders, windowingStrategy, input.isBounded().and(signature.isBoundedPerElement())); // Set output type descriptor similarly to how ParDo.MultiOutput does it. outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor()); return outputs; }
@Test public void testEquals() { TestPipeline p = TestPipeline.create(); TupleTag<Long> longTag = new TupleTag<>(); PCollection<Long> longs = p.apply(GenerateSequence.from(0)); TupleTag<String> strTag = new TupleTag<>(); PCollection<String> strs = p.apply(Create.of("foo", "bar")); EqualsTester tester = new EqualsTester(); // Empty tuples in the same pipeline are equal tester.addEqualityGroup(PCollectionTuple.empty(p), PCollectionTuple.empty(p)); tester.addEqualityGroup( PCollectionTuple.of(longTag, longs).and(strTag, strs), PCollectionTuple.of(longTag, longs).and(strTag, strs)); tester.addEqualityGroup(PCollectionTuple.of(longTag, longs)); tester.addEqualityGroup(PCollectionTuple.of(strTag, strs)); TestPipeline otherPipeline = TestPipeline.create(); // Empty tuples in different pipelines are not equal tester.addEqualityGroup(PCollectionTuple.empty(otherPipeline)); tester.testEquals(); }
private PCollection<String> applySplittableParDo( String name, PCollection<Integer> input, DoFn<Integer, String> fn) { ParDo.MultiOutput<Integer, String> multiOutput = ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty()); PCollectionTuple output = multiOutput.expand(input); output.get(MAIN_OUTPUT_TAG).setName("main"); AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform = AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline); return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG); }
@Override public PFeatureRows expand(PFeatureRows input) { Map<String, Write> transforms = getFeatureStoreTransforms(); Set<String> keys = transforms.keySet(); log.info(String.format("Splitting on keys = [%s]", String.join(",", keys))); MultiOutputSplit<String> splitter = new MultiOutputSplit<>(selector, keys, specs); PCollectionTuple splits = input.getMain().apply(splitter); Map<TupleTag<FeatureRowExtended>, Write> taggedTransforms = new HashMap<>(); for (String key : transforms.keySet()) { TupleTag<FeatureRowExtended> tag = splitter.getStrategy().getTag(key); taggedTransforms.put(tag, transforms.get(key)); } PCollection<FeatureRowExtended> written = splits .apply(new WriteTags(taggedTransforms, MultiOutputSplit.MAIN_TAG)); return new PFeatureRows( written, input.getErrors()); }
@Override public PCollectionTuple expand(PCollection<T> input) { return PCollectionTuple.of(tag, input); } }
@Test public void testOfThenHas() { PCollection<Integer> pCollection = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of()); TupleTag<Integer> tag = new TupleTag<>(); assertTrue(PCollectionTuple.of(tag, pCollection).has(tag)); }
public static <T, V, W> PCollectionTuple tuple( String tag1, PCollection<T> pCollection1, String tag2, PCollection<V> pCollection2, String tag3, PCollection<W> pCollection3) { return tuple( tag1, pCollection1, tag2, pCollection2) .and(new TupleTag<>(tag3), pCollection3); } }
@Override public PCollectionTuple expand(PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>> input) { PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(getMainOutputTag()).and(getAdditionalOutputTags().getAll()), // TODO Collections.emptyMap(), input.getWindowingStrategy(), input.isBounded()); return outputs; } }
@Test @Category(NeedsRunner.class) public void testMismatchingKeys() { PCollection<Row> pc1 = pipeline .apply( "Create1", Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 1, "us").build())) .setRowSchema(CG_SCHEMA_1); PCollection<Row> pc2 = pipeline .apply( "Create2", Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 9, "us").build())) .setRowSchema(CG_SCHEMA_1); TupleTag<Row> pc1Tag = new TupleTag<>("pc1"); TupleTag<Row> pc2Tag = new TupleTag<>("pc2"); thrown.expect(IllegalStateException.class); PCollection<KV<Row, Row>> joined = PCollectionTuple.of(pc1Tag, pc1) .and(pc2Tag, pc2) .apply("CoGroup", CoGroup.byFieldNames(pc1Tag, "user").byFieldNames(pc2Tag, "count")); pipeline.run(); }