@Test public void testTransformTranslatorMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); p.apply(Create.of(Arrays.asList(1, 2, 3))).apply(new TestTransform()); thrown.expect(IllegalStateException.class); thrown.expectMessage(containsString("no translator registered")); DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); assertValidJob(jobCaptor.getValue()); }
/** * Constructs a translator from the provided options. * * @param options Properties that configure the translator. * @return The newly created translator. */ public static DataflowPipelineTranslator fromOptions(DataflowPipelineOptions options) { return new DataflowPipelineTranslator(options); }
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { PTransform<?, ?> transform = node.getTransform(); TransformTranslator translator = getTransformTranslator(transform.getClass()); checkState( translator != null, "no translator registered for primitive transform %s at node %s", transform, node.getFullName()); LOG.debug("Translating {}", transform); currentTransform = node.toAppliedPTransform(getPipeline()); translator.translate(transform, this); currentTransform = null; }
maybeRegisterDebuggee(dataflowOptions, requestId); JobSpecification jobSpecification = translator.translate(pipeline, this, packages); fileLocation); ResourceId fileResource = FileSystems.matchNewResource(fileLocation, false /* isDirectory */); String workSpecJson = DataflowPipelineTranslator.jobToString(newJob); try (PrintWriter printWriter = new PrintWriter( .getStager() .stageToFile( DataflowPipelineTranslator.jobToString(newJob).getBytes(UTF_8), DATAFLOW_GRAPH_FILE_NAME); newJob.getSteps().clear();
@Test public void testTransformTranslator() throws IOException { // Test that we can provide a custom translation DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); TestTransform transform = new TestTransform(); p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of())) .apply(transform); DataflowPipelineTranslator translator = DataflowRunner.fromOptions(options).getTranslator(); DataflowPipelineTranslator.registerTransformTranslator( TestTransform.class, (transform1, context) -> { transform1.translated = true; // Note: This is about the minimum needed to fake out a // translation. This obviously isn't a real translation. TransformTranslator.StepTranslationContext stepContext = context.addStep(transform1, "TestTranslate"); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform1)); }); translator.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertTrue(transform.translated); }
@VisibleForTesting protected DataflowRunner(DataflowPipelineOptions options) { this.options = options; this.dataflowClient = DataflowClient.create(options); this.translator = DataflowPipelineTranslator.fromOptions(options); this.pcollectionsRequiringIndexedFormat = new HashSet<>(); this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>(); }
private static void translateInputs( StepTranslationContext stepContext, PCollection<?> input, List<PCollectionView<?>> sideInputs, TranslationContext context) { stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); translateSideInputs(stepContext, sideInputs, context); }
@Test public void testInaccessibleProvider() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline pipeline = Pipeline.create(options); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options); pipeline.apply(TextIO.read().from(new TestValueProvider())); // Check that translation does not fail. t.translate(pipeline, DataflowRunner.fromOptions(options), Collections.emptyList()); }
@Test public void testSubnetworkConfigMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork()); }
@Test public void testNetworkConfigMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork()); }
@Test public void testSubnetworkConfig() throws IOException { final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK"; DataflowPipelineOptions options = buildPipelineOptions(); options.setSubnetwork(testSubnetwork); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testSubnetwork, job.getEnvironment().getWorkerPools().get(0).getSubnetwork()); }
@Test public void testZoneConfig() throws IOException { final String testZone = "test-zone-1"; DataflowPipelineOptions options = buildPipelineOptions(); options.setZone(testZone); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testZone, job.getEnvironment().getWorkerPools().get(0).getZone()); }
@Test public void testDiskSizeGbConfig() throws IOException { final Integer diskSizeGb = 1234; DataflowPipelineOptions options = buildPipelineOptions(); options.setDiskSizeGb(diskSizeGb); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb()); }
@Test public void testWorkerMachineTypeConfig() throws IOException { final String testMachineType = "test-machine-type"; DataflowPipelineOptions options = buildPipelineOptions(); options.setWorkerMachineType(testMachineType); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0); assertEquals(testMachineType, workerPool.getMachineType()); }
@Test public void testNetworkConfig() throws IOException { final String testNetwork = "test-network"; DataflowPipelineOptions options = buildPipelineOptions(); options.setNetwork(testNetwork); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork()); }
@Test public void testToIterableTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<Iterable<T>> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(3, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(2); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
@Test public void testMultiGraphPipelineSerialization() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3)); input.apply(new UnrelatedOutputCreator()); input.apply(new UnboundOutputCreator()); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions( PipelineOptionsFactory.as(DataflowPipelineOptions.class)); // Check that translation doesn't fail. JobSpecification jobSpecification = t.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob()); }
@Test public void testScalingAlgorithmMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); // Autoscaling settings are always set. assertNull( job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm()); assertEquals( 0, job.getEnvironment() .getWorkerPools() .get(0) .getAutoscalingSettings() .getMaxNumWorkers() .intValue()); }
/** * Test that in translation the name for a collection (in this case just a Create output) is * overridden to be what the Dataflow service expects. */ @Test public void testNamesOverridden() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle"); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); // The Create step Step step = job.getSteps().get(0); // This is the name that is "set by the user" that the Dataflow translator must override String userSpecifiedName = getString( Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME); // This is the calculated name that must actually be used String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0"; assertThat(userSpecifiedName, equalTo(calculatedName)); }
@Test public void testToSingletonTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<T> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1)).apply(View.asSingleton()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(9, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(7).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(8); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }