@Override public void finishSpecifyingOutput( String transformName, PInput input, PTransform<?, ?> transform) { // All component PCollections will already have been finished. Update their names if // appropriate. int i = 0; for (Map.Entry<TupleTag<?>, PCollection<?>> entry : pcollectionMap.entrySet()) { TupleTag<?> tag = entry.getKey(); PCollection<?> pc = entry.getValue(); if (pc.getName().equals(PValueBase.defaultName(transformName))) { pc.setName(String.format("%s.%s", transformName, tag.getOutName(i))); } i++; } }
@Override public PCollection<?> load(String id) throws Exception { checkState( pipeline != null, "%s Cannot rehydrate %s without a %s:" + " provide one via .withPipeline(...)", RehydratedComponents.class.getSimpleName(), PCollection.class.getSimpleName(), Pipeline.class.getSimpleName()); return PCollectionTranslation.fromProto( components.getPcollectionsOrThrow(id), pipeline, RehydratedComponents.this) .setName(id); } });
@Override public void finishSpecifyingOutput( String transformName, PInput input, PTransform<?, ?> transform) { // All component PCollections will have already been finished. int i = 0; for (TaggedPValue tpv : pcollections) { @SuppressWarnings("unchecked") PCollection<T> pc = (PCollection<T>) tpv.getValue(); if (pc.getName().equals(PValueBase.defaultName(transformName))) { pc.setName(String.format("%s.%s%s", transformName, "out", i)); } i++; } }
@Test public void registerPCollectionExistingNameCollision() throws IOException { PCollection<Long> pCollection = pipeline.apply("FirstCount", GenerateSequence.from(0)).setName("foo"); String firstId = components.registerPCollection(pCollection); PCollection<Long> duplicate = pipeline.apply("SecondCount", GenerateSequence.from(0)).setName("foo"); String secondId = components.registerPCollection(duplicate); assertThat(firstId, equalTo("foo")); assertThat(secondId, containsString("foo")); assertThat(secondId, not(equalTo("foo"))); components.toComponents().getPcollectionsOrThrow(firstId); components.toComponents().getPcollectionsOrThrow(secondId); }
@Test public void registerPCollection() throws IOException { PCollection<Long> pCollection = pipeline.apply(GenerateSequence.from(0)).setName("foo"); String id = components.registerPCollection(pCollection); assertThat(id, equalTo("foo")); components.toComponents().getPcollectionsOrThrow(id); }
/** * Gets the {@link AppliedPTransform} that has a created {@code PCollection<KV<String, Integer>>} * as input. */ private AppliedPTransform<?, ?, ?> getAppliedTransform(PTransform pardo) { PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); input.setName("dummy input"); PCollection<Integer> output = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of()); output.setName("dummy output"); return AppliedPTransform.of("pardo", input.expand(), output.expand(), pardo, p); }
private StreamTransformation<?> applyReadSourceTransform( PTransform<?, ?> transform, PCollection.IsBounded isBounded, StreamExecutionEnvironment env) { FlinkStreamingPipelineTranslator.StreamTransformTranslator<PTransform<?, ?>> translator = getReadSourceTranslator(); FlinkStreamingTranslationContext ctx = new FlinkStreamingTranslationContext(env, PipelineOptionsFactory.create()); Pipeline pipeline = Pipeline.create(); PCollection<String> pc = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), isBounded, StringUtf8Coder.of()); pc.setName("output"); Map<TupleTag<?>, PValue> outputs = new HashMap<>(); outputs.put(new TupleTag<>(), pc); AppliedPTransform<?, ?, ?> appliedTransform = AppliedPTransform.of( "test-transform", Collections.emptyMap(), outputs, transform, Pipeline.create()); ctx.setCurrentTransform(appliedTransform); translator.translateNode(transform, ctx); return ctx.getInputDataStream(pc).getTransformation(); }
timerPCollection.setName( String.format("%s.%s", appliedPTransform.getFullName(), localTimerName)); String timerPCollectionId = components.registerPCollection(timerPCollection);
/** * Test that in translation the name for a collection (in this case just a Create output) is * overridden to be what the Dataflow service expects. */ @Test public void testNamesOverridden() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle"); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); // The Create step Step step = job.getSteps().get(0); // This is the name that is "set by the user" that the Dataflow translator must override String userSpecifiedName = getString( Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME); // This is the calculated name that must actually be used String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0"; assertThat(userSpecifiedName, equalTo(calculatedName)); }
private PCollection<String> applySplittableParDo( String name, PCollection<Integer> input, DoFn<Integer, String> fn) { ParDo.MultiOutput<Integer, String> multiOutput = ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty()); PCollectionTuple output = multiOutput.expand(input); output.get(MAIN_OUTPUT_TAG).setName("main"); AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform = AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline); return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG); }
pipeline .apply(Create.of(Arrays.asList(3, -42, 666))) .setName("MyInput") .apply( "MyParDo",
IsBounded.BOUNDED, VarLongCoder.of()) .setName("prim")); hierarchy.pushNode( "AddPc",