public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
@ProcessElement public void processElement(ProcessContext c) { Ddl ddl = c.element(); for (Table table : ddl.allTables()) { String columnsListAsString = table.columns().stream() .map(x -> "t.`" + x.name() + "`") .collect(Collectors.joining(",")); // Also have to export table name to be able to identify which row belongs to // which table. ReadOperation read = ReadOperation.create() .withQuery( String.format( "SELECT \"%s\" AS _spanner_table, %s FROM `%s` AS t", table.name(), columnsListAsString, table.name())); c.output(read); } } }));
@ProcessElement @SuppressWarnings("unused") public void processElement(ProcessContext processContext) { // Save schema to GCS so it can be saved along with the exported file. LOG.info("Creating database client for schema read"); LinkedHashMap<String, String> columns; try { DatabaseClient databaseClient = getDatabaseClient(spannerConfig()); try (ReadOnlyTransaction context = databaseClient.readOnlyTransaction()) { LOG.info("Reading schema information"); columns = getAllColumns(context, table().get()); String columnJson = SpannerConverters.GSON.toJson(columns); LOG.info("Saving schema information"); saveSchema(columnJson, textWritePrefix().get() + SCHEMA_SUFFIX); } } finally { closeSpannerAccessor(); } processContext.output( ReadOperation.create() .withColumns(new ArrayList<>(columns.keySet())) .withTable(table().get())); }
/** * Creates an uninitialized instance of {@link Read}. Before use, the {@link Read} must be * configured with a {@link Read#withInstanceId} and {@link Read#withDatabaseId} that identify the * Cloud Spanner database. */ @Experimental(Experimental.Kind.SOURCE_SINK) public static Read read() { return new AutoValue_SpannerIO_Read.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .setReadOperation(ReadOperation.create()) .setBatching(true) .build(); }
@Test public void testReadAllRecordsInDb() throws Exception { SpannerConfig spannerConfig = createSpannerConfig(); PCollectionView<Transaction> tx = p.apply( SpannerIO.createTransaction() .withSpannerConfig(spannerConfig) .withTimestampBound(TimestampBound.strong())); PCollection<Struct> allRecords = p.apply( SpannerIO.read() .withSpannerConfig(spannerConfig) .withBatching(false) .withQuery( "SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")) .apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via( (SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })) .apply(SpannerIO.readAll().withTransaction(tx).withSpannerConfig(spannerConfig)); PAssert.thatSingleton(allRecords.apply("Count rows", Count.globally())).isEqualTo(5L); p.run(); }