/** * An end-to-end test for {@link DatastoreV1.Read#withLiteralGqlQuery(String)}. * * <p>Write some test entities to datastore and then run a pipeline that reads and counts the * total number of entities. Verify that the count matches the number of entities written. */ private void testE2EV1ReadWithGQLQuery(long limit) throws Exception { String gqlQuery = String.format( "SELECT * from %s WHERE __key__ HAS ANCESTOR KEY(%s, '%s')", options.getKind(), options.getKind(), ancestor); long expectedNumEntities = numEntities; if (limit > 0) { gqlQuery = String.format("%s LIMIT %d", gqlQuery, limit); expectedNumEntities = limit; } DatastoreV1.Read read = DatastoreIO.v1() .read() .withProjectId(project) .withLiteralGqlQuery(gqlQuery) .withNamespace(options.getNamespace()); // Count the total number of entities Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply(read).apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedNumEntities); p.run(); }
.satisfies( input -> { assertEquals(input.keySet(), expectedTempTables.keySet());
PAssert.thatSingleton(flattenedCollection).isEqualTo(queryFlattenedTyped); PAssert.thatSingleton(flattenedSideInput).isEqualTo(queryFlattenedTyped); PAssert.thatSingleton(nonFlattenedCollection).isEqualTo(queryUnflattened); PAssert.thatSingleton(nonFlattenedSideInput).isEqualTo(queryUnflattened); PAssert.thatSingleton(unflattenableCollection).isEqualTo(queryUnflattenable); PAssert.thatSingleton(flattenedCollection).notEqualTo(queryUnflattened); p.run().waitUntilFinish();
PAssert.thatMap(teamScores) .inOnTimePane(window) .isEqualTo( ImmutableMap.<String, Integer>builder().put(redTeam, 7).put(blueTeam, 11).build());
Arrays.asList(decompressedAuto, decompressedDefault, decompressedUncompressed)) { PAssert.thatSingleton(c) .satisfies( input -> { assertEquals(path, input.getMetadata().resourceId().toString()); Arrays.asList(compressionAuto, compressionDefault, compressionGzip)) { PAssert.thatSingleton(c) .satisfies( input -> { assertEquals(pathGZ, input.getMetadata().resourceId().toString());
.put(-16, 1L) .build(); PAssert.thatMap(countsBackToString).isEqualTo(expected);
.isEqualTo((long) numberOfRows);
PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc);
PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc);
PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc);
.withValueTranslation(myValueTranslate)); PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(expectedNumRows); PCollection<String> textValues = cassandraData.apply(Values.create());
.withValueTranslation(myValueTranslate)); PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(expectedNumRows); PCollection<String> textValues = cassandraData.apply(Values.create());
.satisfies( results -> { CoGbkResult result1 = results.get(1);
.withEntity(Scientist.class)); PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(10000L);
/** * Test to read data from embedded Cassandra instance based on query and verify whether data is * read successfully. */ @Test public void testHIFReadForCassandraQuery() { Long expectedCount = 1L; String expectedChecksum = "f11caabc7a9fc170e22b41218749166c"; Configuration conf = getConfiguration(); conf.set( "cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(id) > ? and token(id) <= ? and scientist='Faraday1' allow filtering"); PCollection<KV<Long, String>> cassandraData = p.apply( HadoopInputFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); // Verify the count of data retrieved from Cassandra matches expected count. PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())).isEqualTo(expectedCount); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedChecksum); p.run().waitUntilFinish(); }
@Test public void testReadAllRecordsInDb() throws Exception { SpannerConfig spannerConfig = createSpannerConfig(); PCollectionView<Transaction> tx = p.apply( SpannerIO.createTransaction() .withSpannerConfig(spannerConfig) .withTimestampBound(TimestampBound.strong())); PCollection<Struct> allRecords = p.apply( SpannerIO.read() .withSpannerConfig(spannerConfig) .withBatching(false) .withQuery( "SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")) .apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via( (SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })) .apply(SpannerIO.readAll().withTransaction(tx).withSpannerConfig(spannerConfig)); PAssert.thatSingleton(allRecords.apply("Count rows", Count.globally())).isEqualTo(5L); p.run(); }
PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc);
@Test @Category({NeedsRunner.class, UsesTimersInParDo.class, UsesStatefulParDo.class}) public void testInGlobalWindow() { PCollection<KV<String, Iterable<String>>> collection = pipeline .apply("Input data", Create.of(data)) .apply(GroupIntoBatches.ofSize(BATCH_SIZE)) // set output coder .setCoder(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of()))); PAssert.that("Incorrect batch size in one ore more elements", collection) .satisfies( new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() { private boolean checkBatchSizes(Iterable<KV<String, Iterable<String>>> listToCheck) { for (KV<String, Iterable<String>> element : listToCheck) { if (Iterables.size(element.getValue()) != BATCH_SIZE) { return false; } } return true; } @Override public Void apply(Iterable<KV<String, Iterable<String>>> input) { assertTrue(checkBatchSizes(input)); return null; } }); PAssert.thatSingleton("Incorrect collection size", collection.apply("Count", Count.globally())) .isEqualTo(NUM_ELEMENTS / BATCH_SIZE); pipeline.run(); }
@Test public void testUnboundedSourceCreateTimestamps() { // Same as testUnboundedSourceCustomTimestamps with create timestamp. final int numElements = 1000; final long createTimestampStartMillis = 50000L; PCollection<Long> input = p.apply( mkKafkaReadTransform(numElements, null) .withCreateTime(Duration.ZERO) .updateConsumerProperties( ImmutableMap.of( TIMESTAMP_TYPE_CONFIG, "CreateTime", TIMESTAMP_START_MILLIS_CONFIG, createTimestampStartMillis)) .withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply( MapElements.into(TypeDescriptors.longs()) .via(t -> TimeUnit.SECONDS.toMillis(t) + createTimestampStartMillis)) .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diff only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
PAssert.thatSingleton(diffs).isEqualTo(0L);