/** * Determine if this Node belongs to a Bounded branch of the pipeline, or Unbounded, and * translate with the proper translator. */ protected <TransformT extends PTransform<? super PInput, POutput>> TransformEvaluator<TransformT> translate( TransformHierarchy.Node node, TransformT transform) { // --- determine if node is bounded/unbounded. // usually, the input determines if the PCollection to apply the next transformation to // is BOUNDED or UNBOUNDED, meaning RDD/DStream. Map<TupleTag<?>, PValue> pValues; if (node.getInputs().isEmpty()) { // in case of a PBegin, it's the output. pValues = node.getOutputs(); } else { pValues = node.getInputs(); } PCollection.IsBounded isNodeBounded = isBoundedCollection(pValues.values()); // translate accordingly. LOG.debug("Translating {} as {}", transform, isNodeBounded); return isNodeBounded.equals(PCollection.IsBounded.BOUNDED) ? translator.translateBounded(transform) : translator.translateUnbounded(transform); }
+ other.getTrigger()); isBounded = isBounded.and(input.isBounded());
if (IsBounded.UNBOUNDED.equals(input.isBounded())) { batchLoads.setMaxRetryJobs(1000);
input.isBounded().and(signature.isBoundedPerElement())); @SuppressWarnings("unchecked") Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
/** Writes to different file sinks based on a */ @Override public PDone expand(PCollection<FeatureRowExtended> input) { final String folderName = options.jobName != null ? options.jobName : "unknown-jobs"; FileIO.Write<String, FeatureRowExtended> write = FileIO.<String, FeatureRowExtended>writeDynamic() .by((rowExtended) -> rowExtended.getRow().getEntityName()) .withDestinationCoder(StringUtf8Coder.of()) .withNaming( Contextful.fn( (entityName) -> FileIO.Write.defaultNaming(folderName + "/" + entityName, suffix))) .via(Contextful.fn(toTextFunction), Contextful.fn((entityName) -> TextIO.sink())) .to(options.path); if (input.isBounded().equals(IsBounded.UNBOUNDED)) { Window<FeatureRowExtended> minuteWindow = Window.<FeatureRowExtended>into(FixedWindows.of(options.getWindowDuration())) .triggering(AfterWatermark.pastEndOfWindow()) .discardingFiredPanes() .withAllowedLateness(Duration.ZERO); input = input.apply(minuteWindow); write = write.withNumShards(10); } WriteFilesResult<String> outputFiles = input.apply(write); return PDone.in(outputFiles.getPipeline()); } }
@Override public PDone expand(PCollection<KV<KeyT, ValueT>> input) { // streamed pipeline must have defined configuration transformation if (input.isBounded().equals(PCollection.IsBounded.UNBOUNDED) || !input.getWindowingStrategy().equals(WindowingStrategy.globalDefault())) { checkArgument( configTransform != null, "Writing of unbounded data can be processed only with configuration transformation provider. See %s.withConfigurationTransform()", Write.class); } verifyInputWindowing(input); TypeDescriptor<Configuration> configType = new TypeDescriptor<Configuration>() {}; input .getPipeline() .getCoderRegistry() .registerCoderForType(configType, new ConfigurationCoder()); PCollectionView<Configuration> configView = createConfigurationView(input); return processJob(input, configView); }
public static <OutputT> PCollectionTuple createPrimitiveOutputFor( PCollection<?> input, DoFn<?, OutputT> fn, TupleTag<OutputT> mainOutputTag, TupleTagList additionalOutputTags, Map<TupleTag<?>, Coder<?>> outputTagsToCoders, WindowingStrategy<?, ?> windowingStrategy) { DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()), outputTagsToCoders, windowingStrategy, input.isBounded().and(signature.isBoundedPerElement())); // Set output type descriptor similarly to how ParDo.MultiOutput does it. outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor()); return outputs; }
public void logNRows(PFeatureRows pFeatureRows, String name, int limit) { PCollection<FeatureRowExtended> main = pFeatureRows.getMain(); PCollection<FeatureRowExtended> errors = pFeatureRows.getErrors(); if (main.isBounded().equals(IsBounded.UNBOUNDED)) { Window<FeatureRowExtended> minuteWindow = Window.<FeatureRowExtended>into(FixedWindows.of(Duration.standardMinutes(1L))) .triggering(AfterWatermark.pastEndOfWindow()) .discardingFiredPanes() .withAllowedLateness(Duration.standardMinutes(1)); main = main.apply(minuteWindow); errors = errors.apply(minuteWindow); } main.apply("Sample success", Sample.any(limit)) .apply("Log success sample", ParDo.of(new LoggerDoFn(Level.INFO, name + " MAIN "))); errors .apply("Sample errors", Sample.any(limit)) .apply("Log errors sample", ParDo.of(new LoggerDoFn(Level.ERROR, name + " ERRORS "))); }
private void verifyInputWindowing(PCollection<KV<KeyT, ValueT>> input) { if (input.isBounded().equals(PCollection.IsBounded.UNBOUNDED)) { checkArgument( !input.getWindowingStrategy().equals(WindowingStrategy.globalDefault()), "Cannot work with %s and GLOBAL %s", PCollection.IsBounded.UNBOUNDED, WindowingStrategy.class.getSimpleName()); checkArgument( input.getWindowingStrategy().getTrigger().getClass().equals(DefaultTrigger.class), "Cannot work with %s trigger. Write works correctly only with %s", input.getWindowingStrategy().getTrigger().getClass().getSimpleName(), DefaultTrigger.class.getSimpleName()); checkArgument( input.getWindowingStrategy().getAllowedLateness().equals(Duration.ZERO), "Write does not allow late data."); } }
protected PCollection.IsBounded isBoundedCollection(Collection<PValue> pValues) { // anything that is not a PCollection, is BOUNDED. // For PCollections: // BOUNDED behaves as the Identity Element, BOUNDED + BOUNDED = BOUNDED // while BOUNDED + UNBOUNDED = UNBOUNDED. PCollection.IsBounded isBounded = PCollection.IsBounded.BOUNDED; for (PValue pValue : pValues) { if (pValue instanceof PCollection) { isBounded = isBounded.and(((PCollection) pValue).isBounded()); } else { isBounded = isBounded.and(PCollection.IsBounded.BOUNDED); } } return isBounded; } }
@Override void translateNode( PTransform<PBegin, PCollection<T>> transform, FlinkStreamingTranslationContext context) { if (context.getOutput(transform).isBounded().equals(PCollection.IsBounded.BOUNDED)) { boundedTranslator.translateNode(transform, context); } else { unboundedTranslator.translateNode(transform, context); } } }
@Override void translateNode( PTransform<PBegin, PCollection<T>> transform, FlinkStreamingTranslationContext context) { if (context.getOutput(transform).isBounded().equals(PCollection.IsBounded.BOUNDED)) { boundedTranslator.translateNode((Read.Bounded<T>) transform, context); } else { unboundedTranslator.translateNode((Read.Unbounded<T>) transform, context); } } }
private void verifySupportedTrigger(PCollection<Row> pCollection) { WindowingStrategy windowingStrategy = pCollection.getWindowingStrategy(); if (UNBOUNDED.equals(pCollection.isBounded()) && !triggersOncePerWindow(windowingStrategy)) { throw new UnsupportedOperationException( "Joining unbounded PCollections is currently only supported for " + "non-global windows with triggers that are known to produce output once per window," + "such as the default trigger with zero allowed lateness. " + "In these cases Beam can guarantee it joins all input elements once per window. " + windowingStrategy + " is not supported"); } }
@Override void translateNode( PTransform<PBegin, PCollection<T>> transform, FlinkStreamingTranslationContext context) { if (context.getOutput(transform).isBounded().equals(PCollection.IsBounded.BOUNDED)) { boundedTranslator.translateNode(transform, context); } else { unboundedTranslator.translateNode(transform, context); } } }