private void assertRootsInTopologicalOrder(RunnerApi.Pipeline fusedProto) { Set<String> consumedPCollections = new HashSet<>(); Set<String> producedPCollections = new HashSet<>(); for (int i = 0; i < fusedProto.getRootTransformIdsCount(); i++) { PTransform rootTransform = fusedProto.getComponents().getTransformsOrThrow(fusedProto.getRootTransformIds(i)); assertThat( String.format( "All %s consumed by %s must be produced before it", PCollection.class.getSimpleName(), fusedProto.getRootTransformIds(i)), producedPCollections, hasItems(rootTransform.getInputsMap().values().toArray(new String[0]))); for (String consumed : consumedPCollections) { assertThat( String.format( "%s %s was consumed before all of its producers produced it", PCollection.class.getSimpleName(), consumed), rootTransform.getOutputsMap().values(), not(hasItem(consumed))); } consumedPCollections.addAll(rootTransform.getInputsMap().values()); producedPCollections.addAll(rootTransform.getOutputsMap().values()); } } }
private DescendantConsumers getRootConsumers(PTransformNode rootNode) { checkArgument( rootNode.getTransform().getInputsCount() == 0, "Transform %s is not at the root of the graph (consumes %s)", rootNode.getId(), rootNode.getTransform().getInputsMap()); checkArgument( !pipeline.getEnvironment(rootNode).isPresent(), "%s requires all root nodes to be runner-implemented %s primitives, " + "but transform %s executes in environment %s", GreedyPipelineFuser.class.getSimpleName(), PTransformTranslation.IMPULSE_TRANSFORM_URN, rootNode.getId(), pipeline.getEnvironment(rootNode)); Set<PTransformNode> unfused = new HashSet<>(); unfused.add(rootNode); NavigableSet<CollectionConsumer> environmentNodes = new TreeSet<>(); // Walk down until the first environments are found, and fuse them as appropriate. for (PCollectionNode output : pipeline.getOutputPCollections(rootNode)) { DescendantConsumers descendants = getDescendantConsumers(output); unfused.addAll(descendants.getUnfusedNodes()); environmentNodes.addAll(descendants.getFusibleConsumers()); } return DescendantConsumers.of(unfused, environmentNodes); }
unproducedCollections.remove(producedNode); for (Map.Entry<String, String> consumed : transform.getInputsMap().entrySet()) {
private <T> void translateFlatten( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { Map<String, String> allInputs = pipeline.getComponents().getTransformsOrThrow(id).getInputsMap();
private <T> void translateFlatten( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); Map<String, String> allInputs = transform.getInputsMap();
String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values());
for (String inputId : transform.getInputsMap().keySet()) { String pcollectionId = transform.getInputsOrThrow(inputId); checkArgument(
private static <T> void translateFlatten( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { Map<String, String> allInputs = transform.getTransform().getInputsMap(); DataSet<WindowedValue<T>> result = null;
String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
transform.getInputsMap().values().contains(payload.getInput()), "ExecutableStage %s uses unknown input %s", id,
Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values());
private static <T> void translateFlatten( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { Map<String, String> allInputs = transform.getTransform().getInputsMap(); DataSet<WindowedValue<T>> result = null;
private <T> void translateAssignWindows( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); RunnerApi.PTransform transform = components.getTransformsOrThrow(id); RunnerApi.WindowIntoPayload payload; try { payload = RunnerApi.WindowIntoPayload.parseFrom(transform.getSpec().getPayload()); } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException(e); } //TODO: https://issues.apache.org/jira/browse/BEAM-4296 // This only works for well known window fns, we should defer this execution to the SDK // if the WindowFn can't be parsed or just defer it all the time. WindowFn<T, ? extends BoundedWindow> windowFn = (WindowFn<T, ? extends BoundedWindow>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); String inputCollectionId = Iterables.getOnlyElement(transform.getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getOutputsMap().values()); Coder<WindowedValue<T>> outputCoder = instantiateCoder(outputCollectionId, components); TypeInformation<WindowedValue<T>> resultTypeInfo = new CoderTypeInformation<>(outputCoder); DataStream<WindowedValue<T>> inputDataStream = context.getDataStreamOrThrow(inputCollectionId); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataStream<WindowedValue<T>> resultDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(transform.getUniqueName()) .returns(resultTypeInfo); context.addDataStream(outputCollectionId, resultDataStream); }
private <T> void translateAssignWindows( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); RunnerApi.PTransform transform = components.getTransformsOrThrow(id); RunnerApi.WindowIntoPayload payload; try { payload = RunnerApi.WindowIntoPayload.parseFrom(transform.getSpec().getPayload()); } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException(e); } //TODO: https://issues.apache.org/jira/browse/BEAM-4296 // This only works for well known window fns, we should defer this execution to the SDK // if the WindowFn can't be parsed or just defer it all the time. WindowFn<T, ? extends BoundedWindow> windowFn = (WindowFn<T, ? extends BoundedWindow>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); String inputCollectionId = Iterables.getOnlyElement(transform.getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getOutputsMap().values()); Coder<WindowedValue<T>> outputCoder = instantiateCoder(outputCollectionId, components); TypeInformation<WindowedValue<T>> resultTypeInfo = new CoderTypeInformation<>(outputCoder); DataStream<WindowedValue<T>> inputDataStream = context.getDataStreamOrThrow(inputCollectionId); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataStream<WindowedValue<T>> resultDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(transform.getUniqueName()) .returns(resultTypeInfo); context.addDataStream(outputCollectionId, resultDataStream); }
public Collection<UserStateReference> getUserStates(PTransformNode transform) { return getLocalUserStateNames(transform.getTransform()) .stream() .map( localName -> { String transformId = transform.getId(); PTransform transformProto = components.getTransformsOrThrow(transformId); // Get the main input PCollection id. String collectionId = transform .getTransform() .getInputsOrThrow( Iterables.getOnlyElement( Sets.difference( transform.getTransform().getInputsMap().keySet(), ImmutableSet.builder() .addAll(getLocalSideInputNames(transformProto)) .addAll(getLocalTimerNames(transformProto)) .build()))); PCollection collection = components.getPcollectionsOrThrow(collectionId); return UserStateReference.of( PipelineNode.pTransform(transformId, transformProto), localName, PipelineNode.pCollection(collectionId, collection)); }) .collect(Collectors.toSet()); }
private static <K, V> void translateReshuffle( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { DataSet<WindowedValue<KV<K, V>>> inputDataSet = context.getDataSetOrThrow( Iterables.getOnlyElement(transform.getTransform().getInputsMap().values())); context.addDataSet( Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), inputDataSet.rebalance()); }
private <K, V> void translateReshuffle( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(Iterables.getOnlyElement(transform.getInputsMap().values())); context.addDataStream( Iterables.getOnlyElement(transform.getOutputsMap().values()), inputDataStream.rebalance()); }
private static <K, V> void translateReshuffle( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { DataSet<WindowedValue<KV<K, V>>> inputDataSet = context.getDataSetOrThrow( Iterables.getOnlyElement(transform.getTransform().getInputsMap().values())); context.addDataSet( Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), inputDataSet.rebalance()); }
private <K, V> void translateReshuffle( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(Iterables.getOnlyElement(transform.getInputsMap().values())); context.addDataStream( Iterables.getOnlyElement(transform.getOutputsMap().values()), inputDataStream.rebalance()); }