@Test public void testTransformTranslatorMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); p.apply(Create.of(Arrays.asList(1, 2, 3))).apply(new TestTransform()); thrown.expect(IllegalStateException.class); thrown.expectMessage(containsString("no translator registered")); DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); assertValidJob(jobCaptor.getValue()); }
@Test public void testInaccessibleProvider() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline pipeline = Pipeline.create(options); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options); pipeline.apply(TextIO.read().from(new TestValueProvider())); // Check that translation does not fail. t.translate(pipeline, DataflowRunner.fromOptions(options), Collections.emptyList()); }
@Test public void testSubnetworkConfigMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork()); }
@Test public void testNetworkConfigMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork()); }
@Test public void testSubnetworkConfig() throws IOException { final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK"; DataflowPipelineOptions options = buildPipelineOptions(); options.setSubnetwork(testSubnetwork); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testSubnetwork, job.getEnvironment().getWorkerPools().get(0).getSubnetwork()); }
@Test public void testZoneConfig() throws IOException { final String testZone = "test-zone-1"; DataflowPipelineOptions options = buildPipelineOptions(); options.setZone(testZone); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testZone, job.getEnvironment().getWorkerPools().get(0).getZone()); }
@Test public void testDiskSizeGbConfig() throws IOException { final Integer diskSizeGb = 1234; DataflowPipelineOptions options = buildPipelineOptions(); options.setDiskSizeGb(diskSizeGb); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb()); }
@Test public void testNetworkConfig() throws IOException { final String testNetwork = "test-network"; DataflowPipelineOptions options = buildPipelineOptions(); options.setNetwork(testNetwork); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork()); }
@Test public void testWorkerMachineTypeConfig() throws IOException { final String testMachineType = "test-machine-type"; DataflowPipelineOptions options = buildPipelineOptions(); options.setWorkerMachineType(testMachineType); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0); assertEquals(testMachineType, workerPool.getMachineType()); }
@Test public void testToIterableTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<Iterable<T>> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(3, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(2); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
@Test public void testScalingAlgorithmMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); // Autoscaling settings are always set. assertNull( job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm()); assertEquals( 0, job.getEnvironment() .getWorkerPools() .get(0) .getAutoscalingSettings() .getMaxNumWorkers() .intValue()); }
@Test public void testToSingletonTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<T> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1)).apply(View.asSingleton()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(9, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(7).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(8); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
/** * Test that in translation the name for a collection (in this case just a Create output) is * overridden to be what the Dataflow service expects. */ @Test public void testNamesOverridden() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle"); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); // The Create step Step step = job.getSteps().get(0); // This is the name that is "set by the user" that the Dataflow translator must override String userSpecifiedName = getString( Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME); // This is the calculated name that must actually be used String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0"; assertThat(userSpecifiedName, equalTo(calculatedName)); }
@Test public void testScalingAlgorithmNone() throws IOException { final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.NONE; DataflowPipelineOptions options = buildPipelineOptions(); options.setAutoscalingAlgorithm(noScaling); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals( "AUTOSCALING_ALGORITHM_NONE", job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm()); assertEquals( 0, job.getEnvironment() .getWorkerPools() .get(0) .getAutoscalingSettings() .getMaxNumWorkers() .intValue()); }
@Test public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException { final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null; DataflowPipelineOptions options = buildPipelineOptions(); options.setMaxNumWorkers(42); options.setAutoscalingAlgorithm(noScaling); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); Job job = DataflowPipelineTranslator.fromOptions(options) .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull( job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm()); assertEquals( 42, job.getEnvironment() .getWorkerPools() .get(0) .getAutoscalingSettings() .getMaxNumWorkers() .intValue()); }
Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
@Test public void testTransformTranslator() throws IOException { // Test that we can provide a custom translation DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); TestTransform transform = new TestTransform(); p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of())) .apply(transform); DataflowPipelineTranslator translator = DataflowRunner.fromOptions(options).getTranslator(); DataflowPipelineTranslator.registerTransformTranslator( TestTransform.class, (transform1, context) -> { transform1.translated = true; // Note: This is about the minimum needed to fake out a // translation. This obviously isn't a real translation. TransformTranslator.StepTranslationContext stepContext = context.addStep(transform1, "TestTranslate"); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform1)); }); translator.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertTrue(transform.translated); }
@Test public void testMultiGraphPipelineSerialization() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3)); input.apply(new UnrelatedOutputCreator()); input.apply(new UnboundOutputCreator()); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions( PipelineOptionsFactory.as(DataflowPipelineOptions.class)); // Check that translation doesn't fail. JobSpecification jobSpecification = t.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob()); }
Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
/** This tests a few corner cases that should not crash. */ @Test public void testGoodWildcards() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline pipeline = Pipeline.create(options); DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options); applyRead(pipeline, "gs://bucket/foo"); applyRead(pipeline, "gs://bucket/foo/"); applyRead(pipeline, "gs://bucket/foo/*"); applyRead(pipeline, "gs://bucket/foo/?"); applyRead(pipeline, "gs://bucket/foo/[0-9]"); applyRead(pipeline, "gs://bucket/foo/*baz*"); applyRead(pipeline, "gs://bucket/foo/*baz?"); applyRead(pipeline, "gs://bucket/foo/[0-9]baz?"); applyRead(pipeline, "gs://bucket/foo/baz/*"); applyRead(pipeline, "gs://bucket/foo/baz/*wonka*"); applyRead(pipeline, "gs://bucket/foo/*baz/wonka*"); applyRead(pipeline, "gs://bucket/foo*/baz"); applyRead(pipeline, "gs://bucket/foo?/baz"); applyRead(pipeline, "gs://bucket/foo[0-9]/baz"); // Check that translation doesn't fail. JobSpecification jobSpecification = t.translate(pipeline, DataflowRunner.fromOptions(options), Collections.emptyList()); assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob()); }