org.apache.beam.sdk.values.TupleTagList.of java code examples

private CoGbkResultSchema createSchema(int size) {
 List<TupleTag<?>> tags = new ArrayList<>();
 for (int i = 0; i < size; i++) {
  tags.add(new TupleTag<Integer>("tag" + i));
 }
 return new CoGbkResultSchema(TupleTagList.of(tags));
}

 @Override
 public PCollectionTuple expand(PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>> input) {
  PCollectionTuple outputs =
    PCollectionTuple.ofPrimitiveOutputsInternal(
      input.getPipeline(),
      TupleTagList.of(getMainOutputTag()).and(getAdditionalOutputTags().getAll()),
      // TODO
      Collections.emptyMap(),
      input.getWindowingStrategy(),
      input.isBounded());
  return outputs;
 }
}

public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application)
  throws IOException {
 PTransform<?, ?> transform = application.getTransform();
 if (transform instanceof ParDo.MultiOutput) {
  return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
 }
 RunnerApi.PTransform protoTransform =
   PTransformTranslation.toProto(
     application, SdkComponents.create(application.getPipeline().getOptions()));
 ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
 TupleTag<?> mainOutputTag = getMainOutputTag(payload);
 Set<String> outputTags =
   Sets.difference(
     protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
 ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
 for (String outputTag : outputTags) {
  additionalOutputTags.add(new TupleTag<>(outputTag));
 }
 return TupleTagList.of(additionalOutputTags);
}

 @Override
 public PCollectionTuple expand(PCollection<FailsafeElement<T, String>> failsafeElements) {
  return failsafeElements.apply(
    "JsonToTableRow",
    ParDo.of(
        new DoFn<FailsafeElement<T, String>, TableRow>() {
         @ProcessElement
         public void processElement(ProcessContext context) {
          FailsafeElement<T, String> element = context.element();
          String json = element.getPayload();
          try {
           TableRow row = convertJsonToTableRow(json);
           context.output(row);
          } catch (Exception e) {
           context.output(
             failureTag(),
             FailsafeElement.of(element)
               .setErrorMessage(e.getMessage())
               .setStacktrace(Throwables.getStackTraceAsString(e)));
          }
         }
        })
      .withOutputTags(successTag(), TupleTagList.of(failureTag())));
 }
}

public static <OutputT> PCollectionTuple createPrimitiveOutputFor(
  PCollection<?> input,
  DoFn<?, OutputT> fn,
  TupleTag<OutputT> mainOutputTag,
  TupleTagList additionalOutputTags,
  Map<TupleTag<?>, Coder<?>> outputTagsToCoders,
  WindowingStrategy<?, ?> windowingStrategy) {
 DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
 PCollectionTuple outputs =
   PCollectionTuple.ofPrimitiveOutputsInternal(
     input.getPipeline(),
     TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),
     outputTagsToCoders,
     windowingStrategy,
     input.isBounded().and(signature.isBoundedPerElement()));
 // Set output type descriptor similarly to how ParDo.MultiOutput does it.
 outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor());
 return outputs;
}

private ParDo.MultiOutput<PubsubMessage, Row> createParserParDo() {
 return ParDo.of(
     PubsubMessageToRow.builder()
       .messageSchema(getSchema())
       .useDlq(getDeadLetterQueue() != null)
       .build())
   .withOutputTags(MAIN_TAG, useDlq() ? TupleTagList.of(DLQ_TAG) : TupleTagList.empty());
}

private PCollectionTuple buildPCollectionTupleWithTags(
  TupleTag<Integer> mainOutputTag, TupleTag<Integer> additionalOutputTag) {
 PCollection<Integer> input = p.apply(Create.of(1, 2, 3));
 PCollectionTuple tuple =
   input.apply(
     ParDo.of(new IdentityDoFn())
       .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
 return tuple;
}

private void testAdditionalOutput(IsBounded bounded) {
 TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
 TupleTag<String> additionalOutputTag = new TupleTag<String>("additional") {};
 PCollectionTuple res =
   p.apply("input", Create.of(0, 1, 2))
     .apply(
       ParDo.of(sdfWithAdditionalOutput(bounded, additionalOutputTag))
         .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
 PAssert.that(res.get(mainOutputTag))
   .containsInAnyOrder(Arrays.asList("main:0", "main:1", "main:2"));
 PAssert.that(res.get(additionalOutputTag))
   .containsInAnyOrder(Arrays.asList("additional:0", "additional:1", "additional:2"));
 p.run();
}

 private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
  PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.asSingleton());
  PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
  ParDo.MultiOutput<Long, KV<Long, String>> parDo =
    ParDo.of(new TestDoFn())
      .withSideInputs(view)
      .withOutputTags(
        new TupleTag<KV<Long, String>>() {},
        TupleTagList.of(new TupleTag<KV<String, Long>>() {}));
  PCollectionTuple output = input.apply(parDo);

  Map<TupleTag<?>, PValue> inputs = new HashMap<>();
  inputs.putAll(parDo.getAdditionalInputs());
  inputs.putAll(input.expand());

  return AppliedPTransform
    .<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of(
      "MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
 }
}

@Test
@Category(NeedsRunner.class)
public void testTaggedOutputUnknownCoder() throws Exception {
 PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
 final TupleTag<Integer> mainOutputTag = new TupleTag<>("main");
 final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unknownSide");
 input.apply(
   ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag))
     .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
 thrown.expect(IllegalStateException.class);
 thrown.expectMessage("Unable to return a default Coder");
 pipeline.run();
}

/**
 * @return new PFeatureRows, which has any tagged errors and retries in DoFn added to the errors
 * and retries gathered so far.
 */
public PFeatureRows applyDoFn(String name, BaseFeatureDoFn doFn) {
 MultiOutput<FeatureRowExtended, FeatureRowExtended> transform =
   ParDo.of(doFn.withTransformName(name))
     .withOutputTags(MAIN_TAG, TupleTagList.of(ERRORS_TAG));
 PCollectionTuple transformed = Pipeline.applyTransform(name, main, transform);
 PCollection<FeatureRowExtended> outMain =
   transformed.get(MAIN_TAG).setCoder(ProtoCoder.of(FeatureRowExtended.class));
 PCollection<FeatureRowExtended> outErrors =
   PCollectionList.of(
     transformed.get(ERRORS_TAG).setCoder(ProtoCoder.of(FeatureRowExtended.class)))
     .and(errors)
     .apply(name + "/Flatten errors", Flatten.pCollections())
     .setCoder(ProtoCoder.of(FeatureRowExtended.class));
 return new PFeatureRows(outMain, outErrors);
}

@Test
@Category(NeedsRunner.class)
public void testMainOutputUnregisteredExplicitCoder() {
 PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
 final TupleTag<TestDummy> mainOutputTag = new TupleTag<>("unregisteredMain");
 final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {};
 PCollectionTuple outputTuple =
   input.apply(
     ParDo.of(new MainOutputDummyFn(mainOutputTag, additionalOutputTag))
       .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
 outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());
 pipeline.run();
}

@Test
public void testMultiOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
 TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline
   .apply(Create.of(KV.of(1, 2)))
   .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

@Test
@Category(ValidatesRunner.class)
public void testParDoWithOnlyTaggedOutput() {
 List<Integer> inputs = Arrays.asList(3, -42, 666);
 final TupleTag<Void> mainOutputTag = new TupleTag<Void>("main") {};
 final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additional") {};
 PCollectionTuple outputs =
   pipeline
     .apply(Create.of(inputs))
     .apply(
       ParDo.of(
           new DoFn<Integer, Void>() {
            @ProcessElement
            public void processElement(
              @Element Integer element, MultiOutputReceiver r) {
             r.get(additionalOutputTag).output(element);
            }
           })
         .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
 PAssert.that(outputs.get(mainOutputTag)).empty();
 PAssert.that(outputs.get(additionalOutputTag)).containsInAnyOrder(inputs);
 pipeline.run();
}

@Override
public PCollection<T> expand(PCollection<T> input) {
 TupleTag<T> mainOutput = new TupleTag<>();
 TupleTag<Void> cleanupSignal = new TupleTag<>();
 PCollectionTuple outputs =
   input.apply(
     ParDo.of(new IdentityFn<T>())
       .withOutputTags(mainOutput, TupleTagList.of(cleanupSignal)));
 PCollectionView<Iterable<Void>> cleanupSignalView =
   outputs.get(cleanupSignal).setCoder(VoidCoder.of()).apply(View.asIterable());
 input
   .getPipeline()
   .apply("Create(CleanupOperation)", Create.of(cleanupOperation))
   .apply(
     "Cleanup",
     ParDo.of(
         new DoFn<CleanupOperation, Void>() {
          @ProcessElement
          public void processElement(ProcessContext c) throws Exception {
           c.element().cleanup(new ContextContainer(c, jobIdSideInput));
          }
         })
       .withSideInputs(jobIdSideInput, cleanupSignalView));
 return outputs.get(mainOutput).setCoder(input.getCoder());
}

@Test
@Ignore(
  "TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher "
    + "exposes a way to know when the replacement is not required by checking that the "
    + "preceding ParDos to a GBK are key preserving.")
public void testFnApiMultiOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api");
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
 TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline
   .apply(Create.of(KV.of(1, 2)))
   .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

@Test
@Category(ValidatesRunner.class)
public void testComposePCollectionTuple() {
 pipeline.enableAbandonedNodeEnforcement(true);
 List<Integer> inputs = Arrays.asList(3, -42, 666);
 TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main") {};
 TupleTag<Integer> emptyOutputTag = new TupleTag<Integer>("empty") {};
 final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("extra") {};
 PCollection<Integer> mainInput = pipeline.apply(Create.of(inputs));
 PCollectionTuple outputs =
   mainInput.apply(
     ParDo.of(
         new DoFn<Integer, Integer>() {
          @ProcessElement
          public void processElement(ProcessContext c) {
           c.output(additionalOutputTag, c.element());
          }
         })
       .withOutputTags(emptyOutputTag, TupleTagList.of(additionalOutputTag)));
 assertNotNull("outputs.getPipeline()", outputs.getPipeline());
 outputs = outputs.and(mainOutputTag, mainInput);
 PAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder(inputs);
 PAssert.that(outputs.get(additionalOutputTag)).containsInAnyOrder(inputs);
 PAssert.that(outputs.get(emptyOutputTag)).empty();
 pipeline.run();
}

@Test
public void testTaggedOutputUnregisteredExplicitCoder() throws Exception {
 pipeline.enableAbandonedNodeEnforcement(false);
 PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
 final TupleTag<Integer> mainOutputTag = new TupleTag<>("main");
 final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unregisteredSide");
 ParDo.MultiOutput<Integer, Integer> pardo =
   ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag))
     .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag));
 PCollectionTuple outputTuple = input.apply(pardo);
 outputTuple.get(additionalOutputTag).setCoder(new TestDummyCoder());
 outputTuple.get(additionalOutputTag).apply(View.asSingleton());
 assertEquals(new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder());
 outputTuple
   .get(additionalOutputTag)
   .finishSpecifyingOutput("ParDo", input, pardo); // Check for crashes
 assertEquals(
   new TestDummyCoder(),
   outputTuple.get(additionalOutputTag).getCoder()); // Check for corruption
}

@Test
@Category(ValidatesRunner.class)
public void testParDoWithEmptyTaggedOutput() {
 TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
 TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {};
 TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {};
 PCollectionTuple outputs =
   pipeline
     .apply(Create.empty(VarIntCoder.of()))
     .apply(
       ParDo.of(new TestNoOutputDoFn())
         .withOutputTags(
           mainOutputTag,
           TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2)));
 PAssert.that(outputs.get(mainOutputTag)).empty();
 PAssert.that(outputs.get(additionalOutputTag1)).empty();
 PAssert.that(outputs.get(additionalOutputTag2)).empty();
 pipeline.run();
}

@Parameters(name = "{index}: {0}")
public static Iterable<ParDo.MultiOutput<?, ?>> data() {
 return ImmutableList.of(
   ParDo.of(new DropElementsFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()),
   ParDo.of(new DropElementsFn())
     .withOutputTags(new TupleTag<>(), TupleTagList.empty())
     .withSideInputs(singletonSideInput, multimapSideInput),
   ParDo.of(new DropElementsFn())
     .withOutputTags(
       new TupleTag<>(),
       TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {}))
     .withSideInputs(singletonSideInput, multimapSideInput),
   ParDo.of(new DropElementsFn())
     .withOutputTags(
       new TupleTag<>(),
       TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {})),
   ParDo.of(new SplittableDropElementsFn())
     .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
   ParDo.of(new StateTimerDropElementsFn())
     .withOutputTags(new TupleTag<>(), TupleTagList.empty()));
}

Javadoc

Returns a TupleTagList containing the given TupleTag, in order.

Longer TupleTagList can be created by calling #and on the result.

Popular methods of TupleTagList

getAll
Returns an immutable List of all the TupleTag in this TupleTagList.
empty
Returns an empty TupleTagList.Longer TupleTagList can be created by calling #and on the result.
and
Returns a new TupleTagList that has all the TupleTag of this TupleTagList plus the given TupleTag ap
<init>
get
Returns the TupleTag at the given index (origin zero).
size
Returns the number of TupleTags in this TupleTagList.

Popular in Java

Running tasks concurrently on multiple threads
getSharedPreferences (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getResourceAsStream (ClassLoader)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Top plugins for Android Studio

How to use ofmethodin org.apache.beam.sdk.values.TupleTagList

Best Java code snippets using org.apache.beam.sdk.values.TupleTagList.of (Showing top 20 results out of 315)

How to use
of
method
in
org.apache.beam.sdk.values.TupleTagList