/** * Reject state and timers {@link DoFn}. * * @param doFn the {@link DoFn} to possibly reject. */ public static void rejectStateAndTimers(DoFn<?, ?> doFn) { DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); if (signature.stateDeclarations().size() > 0) { throw new UnsupportedOperationException( String.format( "Found %s annotations on %s, but %s cannot yet be used with state in the %s.", DoFn.StateId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), SparkRunner.class.getSimpleName())); } if (signature.timerDeclarations().size() > 0) { throw new UnsupportedOperationException( String.format( "Found %s annotations on %s, but %s cannot yet be used with timers in the %s.", DoFn.TimerId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), SparkRunner.class.getSimpleName())); } }
@Override public boolean isSplittable() { return signature.processElement().isSplittable(); }
private static <InputT, OutputT> void verifyFnIsStateful(DoFn<InputT, OutputT> fn) { DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); // It is still correct to use this without state or timers, but a bad idea. // Since it is internal it should never be used wrong, so it is OK to crash. checkState( signature.usesState() || signature.usesTimers(), "%s used for %s that does not use state or timers.", BatchStatefulParDoOverrides.class.getSimpleName(), ParDo.class.getSimpleName()); } }
/** * Perform common validations of the {@link DoFn}, for example ensuring that state is used * correctly and that its features can be supported. */ private static <InputT, OutputT> void validate(DoFn<InputT, OutputT> fn) { DoFnSignature signature = DoFnSignatures.getSignature((Class) fn.getClass()); // State is semantically incompatible with splitting if (!signature.stateDeclarations().isEmpty() && signature.processElement().isSplittable()) { throw new UnsupportedOperationException( String.format( "%s is splittable and uses state, but these are not compatible", fn.getClass().getName())); } // Timers are semantically incompatible with splitting if (!signature.timerDeclarations().isEmpty() && signature.processElement().isSplittable()) { throw new UnsupportedOperationException( String.format( "%s is splittable and uses timers, but these are not compatible", fn.getClass().getName())); } }
/** Whether the {@link DoFn} described by this signature uses state. */ public boolean usesState() { return stateDeclarations().size() > 0; }
DoFnSignature.ProcessElementMethod processElement = signature.processElement(); DoFnSignature.GetInitialRestrictionMethod getInitialRestriction = signature.getInitialRestriction(); DoFnSignature.NewTrackerMethod newTracker = signature.newTracker(); DoFnSignature.GetRestrictionCoderMethod getRestrictionCoder = signature.getRestrictionCoder(); DoFnSignature.SplitRestrictionMethod splitRestriction = signature.splitRestriction();
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); if (signature.processElement().isSplittable()) { throw new UnsupportedOperationException( String.format( if (signature.timerDeclarations().size() > 0) { throw new UnsupportedOperationException( String.format(
@Test public void testDeclAndUsageOfStateInSuperclass() throws Exception { class DoFnOverridingAbstractStateUse extends DoFnDeclaringStateAndAbstractUse { @Override public void processWithState(ProcessContext c, ValueState<String> state) {} } DoFnSignature sig = DoFnSignatures.getSignature(new DoFnOverridingAbstractStateUse().getClass()); assertThat(sig.stateDeclarations().size(), equalTo(1)); assertThat(sig.processElement().extraParameters().size(), equalTo(2)); DoFnSignature.StateDeclaration decl = sig.stateDeclarations().get(DoFnOverridingAbstractStateUse.STATE_ID); StateParameter stateParam = (StateParameter) sig.processElement().extraParameters().get(1); assertThat( decl.field(), equalTo(DoFnDeclaringStateAndAbstractUse.class.getDeclaredField("myStateSpec"))); // The method we pull out is the superclass method; this is what allows validation to remain // simple. The later invokeDynamic instruction causes it to invoke the actual implementation. assertThat(stateParam.referent(), equalTo(decl)); }
/** Whether the {@link DoFn} described by this signature uses timers. */ public boolean usesTimers() { return timerDeclarations().size() > 0; }
if (signature.usesState() || signature.usesTimers()) { validateStateApplicableForInput(fn, input); DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement(); RowParameter rowParameter = processElementMethod.getRowParameter(); rowParameter, input.getCoder(), signature.fieldAccessDeclarations(), fn); input.isBounded().and(signature.isBoundedPerElement())); @SuppressWarnings("unchecked") Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
Class<? extends DoFn<?, ?>> fnClass = signature.fnClass(); .intercept(new ProcessElementDelegation(clazzDescription, signature.processElement())) .intercept(delegateOrNoop(clazzDescription, signature.startBundle())) .method(ElementMatchers.named("invokeFinishBundle")) .intercept(delegateOrNoop(clazzDescription, signature.finishBundle())) .method(ElementMatchers.named("invokeSetup")) .intercept(delegateOrNoop(clazzDescription, signature.setup())) .method(ElementMatchers.named("invokeTeardown")) .intercept(delegateOrNoop(clazzDescription, signature.teardown())) .method(ElementMatchers.named("invokeOnWindowExpiration")) .intercept( delegateMethodWithExtraParametersOrNoop( clazzDescription, signature.onWindowExpiration())) .method(ElementMatchers.named("invokeGetInitialRestriction")) .intercept( delegateWithDowncastOrThrow(clazzDescription, signature.getInitialRestriction())) .method(ElementMatchers.named("invokeSplitRestriction")) .intercept(splitRestrictionDelegation(clazzDescription, signature)) .intercept(getRestrictionCoderDelegation(clazzDescription, signature)) .method(ElementMatchers.named("invokeNewTracker")) .intercept(newTrackerDelegation(clazzDescription, signature.newTracker()));
private static Implementation getRestrictionCoderDelegation( TypeDescription doFnType, DoFnSignature signature) { if (signature.processElement().isSplittable()) { if (signature.getRestrictionCoder() == null) { return MethodDelegation.to( new DefaultRestrictionCoder(signature.getInitialRestriction().restrictionT())); } else { return new DowncastingParametersMethodDelegation( doFnType, signature.getRestrictionCoder().targetMethod()); } } else { return ExceptionMethod.throwing(UnsupportedOperationException.class); } }
@Override public boolean matches(AppliedPTransform<?, ?, ?> application) { PTransform<?, ?> transform = application.getTransform(); if (transform instanceof SplittableParDo.ProcessKeyedElements) { DoFn<?, ?> fn = ((SplittableParDo.ProcessKeyedElements) transform).getFn(); DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn); return signature.processElement().isSplittable() && signature.isBoundedPerElement() == IsBounded.BOUNDED; } return false; }
@Test public void testFieldAccess() throws IllegalAccessException { FieldAccessDescriptor descriptor = FieldAccessDescriptor.withFieldNames("foo", "bar"); DoFn<String, String> doFn = new DoFn<String, String>() { @FieldAccess("foo") final FieldAccessDescriptor fieldAccess = descriptor; @ProcessElement public void process(@FieldAccess("foo") Row row) {} }; DoFnSignature sig = DoFnSignatures.getSignature(doFn.getClass()); assertThat(sig.fieldAccessDeclarations().get("foo"), notNullValue()); Field field = sig.fieldAccessDeclarations().get("foo").field(); assertThat(field.getName(), equalTo("fieldAccess")); assertThat(field.get(doFn), equalTo(descriptor)); assertThat(sig.processElement().getRowParameter(), notNullValue()); }
/** * In this particular test, the super class annotated both the timer and the callback, and the * subclass overrides an abstract method. This is allowed. */ @Test public void testOnTimerDeclaredAndUsedInSuperclass() throws Exception { DoFnSignature sig = DoFnSignatures.getSignature(new DoFnOverridingAbstractCallback().getClass()); assertThat(sig.timerDeclarations().size(), equalTo(1)); assertThat(sig.onTimerMethods().size(), equalTo(1)); DoFnSignature.TimerDeclaration decl = sig.timerDeclarations().get(DoFnDeclaringTimerAndAbstractCallback.TIMER_ID); DoFnSignature.OnTimerMethod callback = sig.onTimerMethods().get(DoFnDeclaringTimerAndAbstractCallback.TIMER_ID); assertThat( decl.field(), equalTo(DoFnDeclaringTimerAndAbstractCallback.class.getDeclaredField("myTimerSpec"))); // The method we pull out is the superclass method; this is what allows validation to remain // simple. The later invokeDynamic instruction causes it to invoke the actual implementation. assertThat( callback.targetMethod(), equalTo(DoFnDeclaringTimerAndAbstractCallback.class.getDeclaredMethod("onMyTimer"))); }
/** * Perform common validations of the {@link DoFn} against the input {@link PCollection}, for * example ensuring that the window type expected by the {@link DoFn} matches the window type of * the {@link PCollection}. */ private static <InputT, OutputT> void validateWindowType( PCollection<? extends InputT> input, DoFn<InputT, OutputT> fn) { DoFnSignature signature = DoFnSignatures.getSignature((Class) fn.getClass()); TypeDescriptor<? extends BoundedWindow> actualWindowT = input.getWindowingStrategy().getWindowFn().getWindowTypeDescriptor(); validateWindowTypeForMethod(actualWindowT, signature.processElement()); for (OnTimerMethod method : signature.onTimerMethods().values()) { validateWindowTypeForMethod(actualWindowT, method); } }
@Test public void testSplittableBoundednessInferredFromReturnValue() throws Exception { assertEquals( PCollection.IsBounded.BOUNDED, DoFnSignatures.getSignature(BaseFnWithoutContinuation.class).isBoundedPerElement()); assertEquals( PCollection.IsBounded.UNBOUNDED, DoFnSignatures.getSignature(BaseFnWithContinuation.class).isBoundedPerElement()); }
} else { DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); if (signature.usesState()) { checkArgument(inputCoder instanceof KvCoder, "keyed input required for stateful DoFn"); @SuppressWarnings("rawtypes")
@SuppressWarnings("unchecked") private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication( AppliedPTransform< PCollection<? extends InputT>, PCollectionTuple, PTransform<PCollection<? extends InputT>, PCollectionTuple>> application) throws IOException { DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application); DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); if (signature.processElement().isSplittable()) { return SplittableParDo.forAppliedParDo((AppliedPTransform) application); } else if (signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0) { return new GbkThenStatefulParDo( fn, ParDoTranslation.getMainOutputTag(application), ParDoTranslation.getAdditionalOutputTags(application), ParDoTranslation.getSideInputs(application)); } else { return application.getTransform(); } }
/** @deprecated use {@link #usesState()}, it's cleaner */ @Deprecated public boolean isStateful() { return stateDeclarations().size() > 0; }