Read() { this(BigQueryIO.read(TableRowParser.INSTANCE).withCoder(TableRowJsonCoder.of())); }
/** * Converts a JSON string to a {@link TableRow} object. If the data fails to convert, a {@link * RuntimeException} will be thrown. * * @param json The JSON string to parse. * @return The parsed {@link TableRow} object. */ private static TableRow convertJsonToTableRow(String json) { TableRow row; // Parse the JSON into a {@link TableRow} object. try (InputStream inputStream = new ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8))) { row = TableRowJsonCoder.of().decode(inputStream, Context.OUTER); } catch (IOException e) { throw new RuntimeException("Failed to serialize json to table row: " + json, e); } return row; }
@Override public void encode(BigQueryInsertError value, OutputStream outStream) throws IOException { String errorStrValue = MAPPER.writeValueAsString(value.getError()); StringUtf8Coder.of().encode(errorStrValue, outStream); TableRowJsonCoder.of().encode(value.getRow(), outStream); StringUtf8Coder.of().encode(BigQueryHelpers.toTableSpec(value.getTable()), outStream); }
@Override protected long getEncodedElementByteSize(BigQueryInsertError value) throws Exception { String errorStrValue = MAPPER.writeValueAsString(value.getError()); String tableStrValue = MAPPER.writeValueAsString(value.getTable()); return StringUtf8Coder.of().getEncodedElementByteSize(errorStrValue) + TableRowJsonCoder.of().getEncodedElementByteSize(value.getRow()) + StringUtf8Coder.of().getEncodedElementByteSize(tableStrValue); }
@Override public TableRow decode(InputStream inStream) throws IOException { return decode(inStream, Context.NESTED); }
@Override public void encode(TableRow value, OutputStream outStream) throws IOException { encode(value, outStream, Context.NESTED); }
@Override public TableRowInfo decode(InputStream inStream, Context context) throws IOException { return new TableRowInfo(tableRowCoder.decode(inStream), idCoder.decode(inStream, context)); }
@Override public void encode(TableRowInfo value, OutputStream outStream, Context context) throws IOException { if (value == null) { throw new CoderException("cannot encode a null value"); } tableRowCoder.encode(value.tableRow, outStream); idCoder.encode(value.uniqueId, outStream, context); }
/** * Like {@link #read(SerializableFunction)} but represents each row as a {@link TableRow}. * * <p>This method is more convenient to use in some cases, but usually has significantly lower * performance than using {@link #read(SerializableFunction)} directly to parse data into a * domain-specific type, due to the overhead of converting the rows to {@link TableRow}. */ public static TypedRead<TableRow> readTableRows() { return read(new TableRowParser()).withCoder(TableRowJsonCoder.of()); }
@Override public BigQueryInsertError decode(InputStream inStream) throws IOException { TableDataInsertAllResponse.InsertErrors err = MAPPER.readValue( StringUtf8Coder.of().decode(inStream), TableDataInsertAllResponse.InsertErrors.class); TableRow row = TableRowJsonCoder.of().decode(inStream); TableReference ref = BigQueryHelpers.parseTableSpec(StringUtf8Coder.of().decode(inStream)); return new BigQueryInsertError(row, err, ref); }
private List<TableRow> readRows(String filename) throws IOException { Coder<TableRow> coder = TableRowJsonCoder.of(); List<TableRow> tableRows = Lists.newArrayList(); try (BufferedReader reader = Files.newBufferedReader(Paths.get(filename), StandardCharsets.UTF_8)) { String line; while ((line = reader.readLine()) != null) { TableRow tableRow = coder.decode( new ByteArrayInputStream(line.getBytes(StandardCharsets.UTF_8)), Context.OUTER); tableRows.add(tableRow); } } return tableRows; }
@Override public List<CoderProvider> getCoderProviders() { return ImmutableList.of( CoderProviders.forCoder(TypeDescriptor.of(TableRow.class), TableRowJsonCoder.of()), CoderProviders.forCoder(TypeDescriptor.of(TableRowInfo.class), TableRowInfoCoder.of())); } }
static String encodeQuery(List<TableRow> rows) throws IOException { ListCoder<TableRow> listCoder = ListCoder.of(TableRowJsonCoder.of()); ByteArrayOutputStream output = new ByteArrayOutputStream(); listCoder.encode(rows, output, Context.OUTER); return Base64.encodeBase64String(output.toByteArray()); }
static List<TableRow> rowsFromEncodedQuery(String query) throws IOException { ListCoder<TableRow> listCoder = ListCoder.of(TableRowJsonCoder.of()); ByteArrayInputStream input = new ByteArrayInputStream(Base64.decodeBase64(query)); List<TableRow> rows = listCoder.decode(input, Context.OUTER); for (TableRow row : rows) { convertNumbers(row); } return rows; }
@Test public void testCoder_nullCell() throws CoderException { TableRow row = new TableRow(); row.set("temperature", Data.nullOf(Object.class)); row.set("max_temperature", Data.nullOf(Object.class)); byte[] bytes = CoderUtils.encodeToByteArray(TableRowJsonCoder.of(), row); TableRow newRow = CoderUtils.decodeFromByteArray(TableRowJsonCoder.of(), bytes); byte[] newBytes = CoderUtils.encodeToByteArray(TableRowJsonCoder.of(), newRow); Assert.assertArrayEquals(bytes, newBytes); }
PCollection<WriteBundlesToFiles.Result<DestinationT>> writeShardedFiles( PCollection<KV<DestinationT, TableRow>> input, PCollectionView<String> tempFilePrefix) { checkState(numFileShards > 0); PCollection<KV<ShardedKey<DestinationT>, TableRow>> shardedRecords = input .apply( "AddShard", ParDo.of( new DoFn<KV<DestinationT, TableRow>, KV<ShardedKey<DestinationT>, TableRow>>() { int shardNumber; @Setup public void setup() { shardNumber = ThreadLocalRandom.current().nextInt(numFileShards); } @ProcessElement public void processElement(ProcessContext c) { DestinationT destination = c.element().getKey(); TableRow tableRow = c.element().getValue(); c.output( KV.of( ShardedKey.of(destination, ++shardNumber % numFileShards), tableRow)); } })) .setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), TableRowJsonCoder.of())); return writeShardedRecords(shardedRecords, tempFilePrefix); }
@Override public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) { if (extendedErrorInfo) { TupleTag<BigQueryInsertError> failedInsertsTag = new TupleTag<>(FAILED_INSERTS_TAG_ID); PCollection<BigQueryInsertError> failedInserts = writeAndGetErrors( input, failedInsertsTag, BigQueryInsertErrorCoder.of(), ErrorContainer.BIG_QUERY_INSERT_ERROR_ERROR_CONTAINER); return WriteResult.withExtendedErrors(input.getPipeline(), failedInsertsTag, failedInserts); } else { TupleTag<TableRow> failedInsertsTag = new TupleTag<>(FAILED_INSERTS_TAG_ID); PCollection<TableRow> failedInserts = writeAndGetErrors( input, failedInsertsTag, TableRowJsonCoder.of(), ErrorContainer.TABLE_ROW_ERROR_CONTAINER); return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts); } }
TestStream.create(TableRowJsonCoder.of()) .addElements( elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class))
@Test public void testWriteValidateFailsCreateNoSchema() { p.enableAbandonedNodeEnforcement(false); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("no schema was provided"); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset.table") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); }
coderRegistry.registerCoderForType( TypeDescriptor.of(FeatureRowExtended.class), ProtoCoder.of(FeatureRowExtended.class)); coderRegistry.registerCoderForType(TypeDescriptor.of(TableRow.class), TableRowJsonCoder.of());