public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
public ReadOperation withColumns(String... columns) { return withColumns(Arrays.asList(columns)); }
private ResultSet execute(ReadOperation op, BatchReadOnlyTransaction readOnlyTransaction) { if (op.getQuery() != null) { return readOnlyTransaction.executeQuery(op.getQuery()); } if (op.getIndex() != null) { return readOnlyTransaction.readUsingIndex( op.getTable(), op.getIndex(), op.getKeySet(), op.getColumns()); } return readOnlyTransaction.read(op.getTable(), op.getKeySet(), op.getColumns()); } }
pipeline.apply( Create.of( ReadOperation.create().withQuery("SELECT * FROM users"), ReadOperation.create().withTable("users").withColumns("id", "name")));
@ProcessElement @SuppressWarnings("unused") public void processElement(ProcessContext processContext) { // Save schema to GCS so it can be saved along with the exported file. LOG.info("Creating database client for schema read"); LinkedHashMap<String, String> columns; try { DatabaseClient databaseClient = getDatabaseClient(spannerConfig()); try (ReadOnlyTransaction context = databaseClient.readOnlyTransaction()) { LOG.info("Reading schema information"); columns = getAllColumns(context, table().get()); String columnJson = SpannerConverters.GSON.toJson(columns); LOG.info("Saving schema information"); saveSchema(columnJson, textWritePrefix().get() + SCHEMA_SUFFIX); } } finally { closeSpannerAccessor(); } processContext.output( ReadOperation.create() .withColumns(new ArrayList<>(columns.keySet())) .withTable(table().get())); }
public ReadOperation withQuery(String sql) { return withQuery(Statement.of(sql)); }
/** * Creates an uninitialized instance of {@link Read}. Before use, the {@link Read} must be * configured with a {@link Read#withInstanceId} and {@link Read#withDatabaseId} that identify the * Cloud Spanner database. */ @Experimental(Experimental.Kind.SOURCE_SINK) public static Read read() { return new AutoValue_SpannerIO_Read.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .setReadOperation(ReadOperation.create()) .setBatching(true) .build(); }
public Read withTable(String table) { return withReadOperation(getReadOperation().withTable(table)); }
+ "with withTimestampBound or withTimestamp method"); if (getReadOperation().getQuery() != null) { } else if (getReadOperation().getTable() != null) { getReadOperation().getColumns(), "For a read operation SpannerIO.read() requires a list of " + "columns to set with withColumns method"); checkArgument( !getReadOperation().getColumns().isEmpty(), "For a read operation SpannerIO.read() requires a" + " list of columns to set with withColumns method");
public Read withQuery(Statement statement) { return withReadOperation(getReadOperation().withQuery(statement)); }
@ProcessElement public void processElement(ProcessContext c) { Ddl ddl = c.element(); for (Table table : ddl.allTables()) { String columnsListAsString = table.columns().stream() .map(x -> "t.`" + x.name() + "`") .collect(Collectors.joining(",")); // Also have to export table name to be able to identify which row belongs to // which table. ReadOperation read = ReadOperation.create() .withQuery( String.format( "SELECT \"%s\" AS _spanner_table, %s FROM `%s` AS t", table.name(), columnsListAsString, table.name())); c.output(read); } } }));
private List<Partition> execute(ReadOperation op, BatchReadOnlyTransaction tx) { // Query was selected. if (op.getQuery() != null) { return tx.partitionQuery(op.getPartitionOptions(), op.getQuery()); } // Read with index was selected. if (op.getIndex() != null) { return tx.partitionReadUsingIndex( op.getPartitionOptions(), op.getTable(), op.getIndex(), op.getKeySet(), op.getColumns()); } // Read from table was selected. return tx.partitionRead( op.getPartitionOptions(), op.getTable(), op.getKeySet(), op.getColumns()); } }
public Read withColumns(List<String> columns) { return withReadOperation(getReadOperation().withColumns(columns)); }
@Test public void testReadAllRecordsInDb() throws Exception { SpannerConfig spannerConfig = createSpannerConfig(); PCollectionView<Transaction> tx = p.apply( SpannerIO.createTransaction() .withSpannerConfig(spannerConfig) .withTimestampBound(TimestampBound.strong())); PCollection<Struct> allRecords = p.apply( SpannerIO.read() .withSpannerConfig(spannerConfig) .withBatching(false) .withQuery( "SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")) .apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via( (SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })) .apply(SpannerIO.readAll().withTransaction(tx).withSpannerConfig(spannerConfig)); PAssert.thatSingleton(allRecords.apply("Count rows", Count.globally())).isEqualTo(5L); p.run(); }