@Override public PTransformReplacement<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> getReplacementTransform( AppliedPTransform< PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>, PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>>> transform) { GlobalCombineFn<?, ?, ?> globalFn = ((Combine.PerKey) transform.getTransform()).getFn(); checkState( globalFn instanceof CombineFn, "%s.matcher() should only match %s instances using %s, got %s", MultiStepCombine.class.getSimpleName(), PerKey.class.getSimpleName(), CombineFn.class.getSimpleName(), globalFn.getClass().getName()); @SuppressWarnings("unchecked") CombineFn<InputT, AccumT, OutputT> fn = (CombineFn<InputT, AccumT, OutputT>) globalFn; @SuppressWarnings("unchecked") PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) Iterables.getOnlyElement(transform.getInputs().values()); @SuppressWarnings("unchecked") PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) Iterables.getOnlyElement(transform.getOutputs().values()); return PTransformReplacement.of(input, new MultiStepCombine<>(fn, output.getCoder())); } }
private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) { TestPipeline p = TestPipeline.fromOptions(options); StreamingShardedWriteFactory<Object, Void, Object> factory = new StreamingShardedWriteFactory<>(p.getOptions()); WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString())); PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of())); AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>> originalApplication = AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p); WriteFiles<Object, Void, Object> replacement = (WriteFiles<Object, Void, Object>) factory.getReplacementTransform(originalApplication).getTransform(); assertThat(replacement, not(equalTo((Object) original))); assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards)); WriteFilesResult<Void> originalResult = objs.apply(original); WriteFilesResult<Void> replacementResult = objs.apply(replacement); Map<PValue, ReplacementOutput> res = factory.mapOutputs(originalResult.expand(), replacementResult); assertEquals(1, res.size()); assertEquals( originalResult.getPerDestinationOutputFilenames(), res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue()); }
@Test public void getInputEmptySucceeds() { PTransformReplacement<PCollectionList<Long>, PCollection<Long>> replacement = factory.getReplacementTransform( AppliedPTransform.of( "nonEmptyInput", Collections.emptyMap(), Collections.emptyMap(), Flatten.pCollections(), pipeline)); assertThat(replacement.getInput().getAll(), emptyIterable()); }
replacement = replacement.withWindowedWrites(); return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), replacement.withNumShards(numShards));
replacement = replacement.withWindowedWrites(); return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), replacement.withNumShards(numShards));
@Test public void getReplacementTransformGetSideInputs() { PCollectionView<Long> sideLong = pipeline .apply("LongSideInputVals", Create.of(-1L, -2L, -4L)) .apply("SideLongView", Sum.longsGlobally().asSingletonView()); PCollectionView<List<String>> sideStrings = pipeline .apply("StringSideInputVals", Create.of("foo", "bar", "baz")) .apply("SideStringsView", View.asList()); ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(new ToLongFn()).withSideInputs(sideLong, sideStrings); PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3)); AppliedPTransform< PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>> application = AppliedPTransform.of( "original", input.expand(), input.apply(originalTransform).expand(), originalTransform, pipeline); PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform = factory.getReplacementTransform(application); ParDoSingle<Integer, Long> parDoSingle = (ParDoSingle<Integer, Long>) replacementTransform.getTransform(); assertThat(parDoSingle.getSideInputs(), containsInAnyOrder(sideStrings, sideLong)); }
AppliedPTransform.of( "foo", ints.expand(), view.expand(), CreatePCollectionView.of(view), p)); ints.apply(replacement.getTransform()); final AtomicBoolean writeViewVisited = new AtomicBoolean(); p.traverseTopologically(
/** * A test that demonstrates that the replacement transform has the Display Data of the {@link * ParDo.SingleOutput} it replaces. */ @Test public void getReplacementTransformPopulateDisplayData() { ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(new ToLongFn()); DisplayData originalDisplayData = DisplayData.from(originalTransform); PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3)); AppliedPTransform< PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>> application = AppliedPTransform.of( "original", input.expand(), input.apply(originalTransform).expand(), originalTransform, pipeline); PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacement = factory.getReplacementTransform(application); DisplayData replacementDisplayData = DisplayData.from(replacement.getTransform()); assertThat(replacementDisplayData, equalTo(originalDisplayData)); DisplayData primitiveDisplayData = Iterables.getOnlyElement( DisplayDataEvaluator.create() .displayDataForPrimitiveTransforms(replacement.getTransform(), VarIntCoder.of())); assertThat(primitiveDisplayData, equalTo(replacementDisplayData)); }
@Test public void withNoShardingSpecifiedReturnsNewTransform() { ResourceId outputDirectory = LocalResources.fromString("/foo", true /* isDirectory */); PTransform<PCollection<Object>, WriteFilesResult<Void>> original = WriteFiles.to( new FileBasedSink<Object, Void, Object>( StaticValueProvider.of(outputDirectory), DynamicFileDestinations.constant(new FakeFilenamePolicy())) { @Override public WriteOperation<Void, Object> createWriteOperation() { throw new IllegalArgumentException("Should not be used"); } }); @SuppressWarnings("unchecked") PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of())); AppliedPTransform< PCollection<Object>, WriteFilesResult<Void>, PTransform<PCollection<Object>, WriteFilesResult<Void>>> originalApplication = AppliedPTransform.of("write", objs.expand(), Collections.emptyMap(), original, p); assertThat( factory.getReplacementTransform(originalApplication).getTransform(), not(equalTo((Object) original))); }
private < InputT extends PInput, OutputT extends POutput, TransformT extends PTransform<? super InputT, OutputT>> void applyReplacement( Node original, PTransformOverrideFactory<InputT, OutputT, TransformT> replacementFactory) { PTransformReplacement<InputT, OutputT> replacement = replacementFactory.getReplacementTransform( (AppliedPTransform<InputT, OutputT, TransformT>) original.toAppliedPTransform(this)); if (replacement.getTransform() == original.getTransform()) { return; } InputT originalInput = replacement.getInput(); LOG.debug("Replacing {} with {}", original, replacement); transforms.replaceNode(original, originalInput, replacement.getTransform()); try { OutputT newOutput = replacement.getTransform().expand(originalInput); Map<PValue, ReplacementOutput> originalToReplacement = replacementFactory.mapOutputs(original.getOutputs(), newOutput); // Ensure the internal TransformHierarchy data structures are consistent. transforms.setOutput(newOutput); transforms.replaceOutputs(originalToReplacement); } finally { transforms.popNode(); } }
@Override public PTransformReplacement<PCollection<InputT>, WriteFilesResult<DestinationT>> getReplacementTransform( AppliedPTransform< PCollection<InputT>, WriteFilesResult<DestinationT>, PTransform<PCollection<InputT>, WriteFilesResult<DestinationT>>> transform) { try { WriteFiles<InputT, DestinationT, ?> replacement = WriteFiles.to(WriteFilesTranslation.getSink(transform)) .withSideInputs(WriteFilesTranslation.getDynamicDestinationSideInputs(transform)) .withSharding(new LogElementShardsWithDrift<>()); if (WriteFilesTranslation.isWindowedWrites(transform)) { replacement = replacement.withWindowedWrites(); } return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), replacement); } catch (IOException e) { throw new RuntimeException(e); } }
@Test public void testRunnerDeterminedSharding() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); options.setFlinkMaster("[auto]"); options.setParallelism(5); TestPipeline p = TestPipeline.fromOptions(options); StreamingShardedWriteFactory<Object, Void, Object> factory = new StreamingShardedWriteFactory<>(p.getOptions()); WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString())); @SuppressWarnings("unchecked") PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of())); AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>> originalApplication = AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p); WriteFiles<Object, Void, Object> replacement = (WriteFiles<Object, Void, Object>) factory.getReplacementTransform(originalApplication).getTransform(); assertThat(replacement, not(equalTo((Object) original))); assertThat(replacement.getNumShardsProvider().get(), is(10)); }
@Override public PTransformReplacement<PCollection<ElemT>, PCollection<ElemT>> getReplacementTransform( AppliedPTransform< PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> transform) { PCollectionView<ViewT> view; try { view = CreatePCollectionViewTranslation.getView(transform); } catch (IOException exc) { throw new RuntimeException( String.format( "Could not extract %s from transform %s", PCollectionView.class.getSimpleName(), transform), exc); } return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), new GroupAndWriteView<>(view)); }
@Test @Category(NeedsRunner.class) public void testOverride() { PCollectionList<Long> empty = PCollectionList.empty(pipeline); PCollection<Long> emptyFlattened = empty.apply( factory .getReplacementTransform( AppliedPTransform.of( "nonEmptyInput", Collections.emptyMap(), Collections.emptyMap(), Flatten.pCollections(), pipeline)) .getTransform()); PAssert.that(emptyFlattened).empty(); pipeline.run(); } }
@Test public void getReplacementTransformGetFn() { DoFn<Integer, Long> originalFn = new ToLongFn(); ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(originalFn); PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3)); AppliedPTransform< PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>> application = AppliedPTransform.of( "original", input.expand(), input.apply(originalTransform).expand(), originalTransform, pipeline); PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform = factory.getReplacementTransform(application); ParDoSingle<Integer, Long> parDoSingle = (ParDoSingle<Integer, Long>) replacementTransform.getTransform(); assertThat(parDoSingle.getFn(), equalTo(originalTransform.getFn())); assertThat(parDoSingle.getFn(), equalTo(originalFn)); }
@Override public PTransformReplacement<PCollection<ElemT>, PCollection<ElemT>> getReplacementTransform( AppliedPTransform< PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> transform) { PCollection<ElemT> collection = (PCollection<ElemT>) Iterables.getOnlyElement(transform.getInputs().values()); PCollectionView<ViewT> view; try { view = CreatePCollectionViewTranslation.getView(transform); } catch (IOException e) { throw new RuntimeException(e); } CreateStreamingFlinkView<ElemT, ViewT> createFlinkView = new CreateStreamingFlinkView<>(view); return PTransformReplacement.of(collection, createFlinkView); }
@Override public PTransformReplacement<PCollection<InputT>, PCollection<OutputT>> getReplacementTransform( AppliedPTransform< PCollection<InputT>, PCollection<OutputT>, ParDo.SingleOutput<InputT, OutputT>> appliedTransform) { return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(appliedTransform), new PTransform<PCollection<InputT>, PCollection<OutputT>>() { @Override public PCollection<OutputT> expand(PCollection<InputT> input) { return input .apply("Materialize input", Reshuffle.viaRandomKey()) .apply("ParDo with stable input", appliedTransform.getTransform()); } }); }
@Override public PTransformReplacement<PCollection<InputT>, PCollectionTuple> getReplacementTransform( AppliedPTransform<PCollection<InputT>, PCollectionTuple, ParDo.MultiOutput<InputT, OutputT>> appliedTransform) { return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(appliedTransform), new PTransform<PCollection<InputT>, PCollectionTuple>() { @Override public PCollectionTuple expand(PCollection<InputT> input) { return input .apply("Materialize input", Reshuffle.viaRandomKey()) .apply("ParDo with stable input", appliedTransform.getTransform()); } }); }
@Override public PTransformReplacement<PCollection<? extends InputT>, PCollectionTuple> getReplacementTransform( AppliedPTransform< PCollection<? extends InputT>, PCollectionTuple, PTransform<PCollection<? extends InputT>, PCollectionTuple>> application) { try { return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(application), getReplacementForApplication(application)); } catch (IOException exc) { throw new RuntimeException(exc); } }
@Override public PTransformReplacement<PCollection<ElemT>, PCollection<ElemT>> getReplacementTransform( AppliedPTransform< PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> transform) { PCollection<ElemT> collection = (PCollection<ElemT>) Iterables.getOnlyElement(transform.getInputs().values()); PCollectionView<ViewT> view; try { view = CreatePCollectionViewTranslation.getView(transform); } catch (IOException e) { throw new RuntimeException(e); } CreateStreamingFlinkView<ElemT, ViewT> createFlinkView = new CreateStreamingFlinkView<>(view); return PTransformReplacement.of(collection, createFlinkView); }