@Test public void testViewUnboundedAsMapDirect() { testViewUnbounded(pipeline, View.asMap()); }
@Test public void testViewNonmergingAsMapDirect() { testViewNonmerging(pipeline, View.asMap()); }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with the * specified reason. The {@link PCollection} must have at most one value per key. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, V>> thatMap( String reason, PCollection<KV<K, V>> actual) { @SuppressWarnings("unchecked") KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder(); return new PCollectionViewAssert<>( actual, View.asMap(), MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()), PAssertionSite.capture(reason)); }
@Test public void testViewNonmergingAsMapBatch() { testViewNonmerging(createTestBatchRunner(), View.asMap()); }
@Test public void testViewUnboundedAsMapBatch() { testViewUnbounded(createTestBatchRunner(), View.asMap()); }
@Test public void testViewUnboundedAsMapStreaming() { testViewUnbounded(createTestStreamingRunner(), View.asMap()); }
@Test public void testViewNonmergingAsMapStreaming() { testViewNonmerging(createTestStreamingRunner(), View.asMap()); }
@Override public PCollection<KV<Integer, float[]>> expand(final PCollection<KV<Integer, float[]>> itemMatrix) { // Parse data for user final PCollection<KV<Integer, KV<int[], float[]>>> parsedUserData = rawData .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(true))) .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner())); // Make Item Matrix view. final PCollectionView<Map<Integer, float[]>> itemMatrixView = itemMatrix.apply(View.asMap()); // Get new User Matrix final PCollectionView<Map<Integer, float[]>> userMatrixView = parsedUserData .apply(ParDo.of(new AlternatingLeastSquare.CalculateNextMatrix(numFeatures, lambda, itemMatrixView)) .withSideInputs(itemMatrixView)) .apply(View.asMap()); // return new Item Matrix return parsedItemData.apply( ParDo.of(new AlternatingLeastSquare.CalculateNextMatrix(numFeatures, lambda, userMatrixView)) .withSideInputs(userMatrixView)); } }
@Override public PCollection<KV<Integer, float[]>> expand(final PCollection<KV<Integer, float[]>> itemMatrix) { // Make Item Matrix view. final PCollectionView<Map<Integer, float[]>> itemMatrixView = itemMatrix.apply(GroupByKey.create()).apply(ParDo.of(new UngroupSingleVectorList())).apply(View.asMap()); // Get new User Matrix final PCollectionView<Map<Integer, float[]>> userMatrixView = parsedUserData .apply(ParDo.of(new CalculateNextMatrix(numFeatures, lambda, itemMatrixView)).withSideInputs(itemMatrixView)) .apply(GroupByKey.create()).apply(ParDo.of(new UngroupSingleVectorList())).apply(View.asMap()); // return new Item Matrix return parsedItemData.apply(ParDo.of(new CalculateNextMatrix(numFeatures, lambda, userMatrixView)) .withSideInputs(userMatrixView)); } }
@Before public void setup() { MockitoAnnotations.initMocks(this); PCollection<Integer> create = pipeline.apply("forBaseCollection", Create.of(1, 2, 3, 4)); mapView = create.apply("forKeyTypes", WithKeys.of("foo")).apply("asMapView", View.asMap()); singletonView = create.apply("forCombinedTypes", Mean.<Integer>globally().asSingletonView()); iterableView = create.apply("asIterableView", View.asIterable()); container = SideInputContainer.create(context, ImmutableList.of(iterableView, mapView, singletonView)); }
@Test public void getAfterWriteReturnsPaneInWindow() throws Exception { ImmutableList.Builder<WindowedValue<?>> valuesBuilder = ImmutableList.builder(); for (Object materializedValue : materializeValuesFor(View.asMap(), KV.of("one", 1))) { valuesBuilder.add( WindowedValue.of( materializedValue, new Instant(1L), FIRST_WINDOW, PaneInfo.ON_TIME_AND_ONLY_FIRING)); } for (Object materializedValue : materializeValuesFor(View.asMap(), KV.of("two", 2))) { valuesBuilder.add( WindowedValue.of( materializedValue, new Instant(20L), FIRST_WINDOW, PaneInfo.ON_TIME_AND_ONLY_FIRING)); } container.write(mapView, valuesBuilder.build()); Map<String, Integer> viewContents = container.createReaderForViews(ImmutableList.of(mapView)).get(mapView, FIRST_WINDOW); assertThat(viewContents, hasEntry("one", 1)); assertThat(viewContents, hasEntry("two", 2)); assertThat(viewContents.size(), is(2)); }
@Override public PCollection<KV<Integer, List<Double>>> expand(final PCollection<KV<Integer, List<Double>>> model) { // Model as a view. final PCollectionView<Map<Integer, List<Double>>> modelView = model.apply(View.asMap()); // Find gradient. final PCollection<KV<Integer, List<Double>>> gradient = readInput .apply(ParDo.of( new CalculateGradient(modelView, numClasses, numFeatures)).withSideInputs(modelView)) .apply(Combine.perKey(new CombineFunction())); // Tags for CoGroupByKey. final TupleTag<List<Double>> gradientTag = new TupleTag<>(); final TupleTag<List<Double>> modelTag = new TupleTag<>(); final KeyedPCollectionTuple<Integer> coGbkInput = KeyedPCollectionTuple .of(gradientTag, gradient) .and(modelTag, model); final PCollection<KV<Integer, CoGbkResult>> groupResult = coGbkInput.apply(CoGroupByKey.create()); // Update the model return groupResult .apply(ParDo.of(new ApplyGradient(numFeatures, numClasses, iterationNum, gradientTag, modelTag))); } }
@Test public void testMapSideInputTranslation() throws Exception { assertEquals( ListCoder.of( KvCoder.of(VoidCoder.of(), KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))), getTranslatedSideInputCoder(ImmutableList.of(KV.of("a", 1), KV.of("b", 3)), View.asMap())); }
@Test public void testViewGetName() { assertEquals("View.AsSingleton", View.<Integer>asSingleton().getName()); assertEquals("View.AsIterable", View.<Integer>asIterable().getName()); assertEquals("View.AsMap", View.<String, Integer>asMap().getName()); assertEquals("View.AsMultimap", View.<String, Integer>asMultimap().getName()); }
@Test public void finishDoesNotOverwriteWrittenElements() throws Exception { ImmutableList.Builder<WindowedValue<?>> valuesBuilder = ImmutableList.builder(); for (Object materializedValue : materializeValuesFor(View.asMap(), KV.of("one", 1))) { valuesBuilder.add( WindowedValue.of( materializedValue, new Instant(1L), SECOND_WINDOW, PaneInfo.createPane(true, false, Timing.EARLY))); } for (Object materializedValue : materializeValuesFor(View.asMap(), KV.of("two", 2))) { valuesBuilder.add( WindowedValue.of( materializedValue, new Instant(20L), SECOND_WINDOW, PaneInfo.createPane(true, false, Timing.EARLY))); } container.write(mapView, valuesBuilder.build()); immediatelyInvokeCallback(mapView, SECOND_WINDOW); Map<String, Integer> viewContents = container.createReaderForViews(ImmutableList.of(mapView)).get(mapView, SECOND_WINDOW); assertThat(viewContents, hasEntry("one", 1)); assertThat(viewContents, hasEntry("two", 2)); assertThat(viewContents.size(), is(2)); }
@Test public void testWriteWithMissingSchemaFromView() throws Exception { PCollectionView<Map<String, String>> view = p.apply("Create schema view", Create.of(KV.of("foo", "bar"), KV.of("bar", "boo"))) .apply(View.asMap()); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchemaFromView(view) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("does not contain data for table destination dataset-id.table-id"); p.run(); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testEmptyMapSideInputWithNonDeterministicKeyCoder() throws Exception { final PCollectionView<Map<String, Integer>> view = pipeline .apply( "CreateEmptyView", Create.empty(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.asMap()); PCollection<Integer> results = pipeline .apply("Create1", Create.of(1)) .apply( "OutputSideInputs", ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { assertTrue(c.sideInput(view).isEmpty()); assertTrue(c.sideInput(view).entrySet().isEmpty()); assertFalse(c.sideInput(view).entrySet().iterator().hasNext()); c.output(c.element()); } }) .withSideInputs(view)); // Pass at least one value through to guarantee that DoFn executes. PAssert.that(results).containsInAnyOrder(1); pipeline.run(); }
@Test @Category(ValidatesRunner.class) public void testMapAsEntrySetSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))) .apply(View.asMap()); PCollection<KV<String, Integer>> output = pipeline .apply("CreateMainInput", Create.of(2 /* size */)) .apply( "OutputSideInputs", ParDo.of( new DoFn<Integer, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { assertEquals((int) c.element(), c.sideInput(view).size()); assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) { c.output(KV.of(entry.getKey(), entry.getValue())); } } }) .withSideInputs(view)); PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("b", 3)); pipeline.run(); }
@Test @Category(ValidatesRunner.class) public void testMapSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))) .apply(View.asMap()); PCollection<KV<String, Integer>> output = pipeline .apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( KV.of( c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }) .withSideInputs(view)); PAssert.that(output) .containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testMapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Integer>> view = pipeline .apply( "CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.asMap()); PCollection<KV<String, Integer>> output = pipeline .apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( KV.of( c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }) .withSideInputs(view)); PAssert.that(output) .containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); }