/** * Converts a {@link StructuredRecord} to a delimited string. */ public static String toDelimitedString(final StructuredRecord record, String delimiter) { return record.getSchema().getFields().stream() .map(Schema.Field::getName) .map(record::get) .map(value -> value == null ? "" : value.toString()) .collect(Collectors.joining(delimiter)); }
/** * Converts a {@link StructuredRecord} to a delimited string. */ public static String toDelimitedString(final StructuredRecord record, String delimiter) { return Joiner.on(delimiter).join( Iterables.transform(record.getSchema().getFields(), new Function<Schema.Field, String>() { @Override public String apply(Schema.Field field) { return record.get(field.getName()).toString(); } })); }
public CubeFact transform(StructuredRecord record) throws Exception { Schema recordSchema = record.getSchema(); Preconditions.checkArgument(recordSchema.getType() == Schema.Type.RECORD, "input must be a record."); return factBuilder.build(record); }
@SuppressWarnings("ConstantConditions") @Override public void write(DataOutput out) throws IOException { byte[] schemaBytes = Bytes.toBytes(record.getSchema().toString()); out.writeInt(schemaBytes.length); out.write(schemaBytes); byte[] recordBytes = Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record)); out.writeInt(recordBytes.length); out.write(recordBytes); }
/** * Converts a {@link StructuredRecord} to a json string. */ public static String toJsonString(StructuredRecord record) throws IOException { StringWriter strWriter = new StringWriter(); JsonWriter writer = new JsonWriter(strWriter); try { writeJson(writer, record.getSchema(), record); return strWriter.toString(); } finally { writer.close(); } }
/** * Converts a {@link StructuredRecord} to a json string. */ public static String toJsonString(StructuredRecord record) throws IOException { StringWriter strWriter = new StringWriter(); JsonWriter writer = new JsonWriter(strWriter); try { writeJson(writer, record.getSchema(), record); return strWriter.toString(); } finally { writer.close(); } }
private StructuredRecord.Builder getOutputBuilder(StructuredRecord input) { List<Schema.Field> outFields = new ArrayList<>(); for (Schema.Field field : input.getSchema().getFields()) { outFields.add(field); } Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields); // copy all the values StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema); for (Schema.Field inField : input.getSchema().getFields()) { outFields.add(inField); outputBuilder.set(inField.getName(), input.get(inField.getName())); } return outputBuilder; }
@Override public void write(Kryo kryo, Output output, StructuredRecord record) { // First write out the schema kryo.writeObject(output, record.getSchema()); // Then write out the data try { DATUM_WRITER.encode(record, new KryoEncoder(output)); } catch (IOException e) { throw new KryoException("Failed to encode StructuredRecord " + record.getSchema().getRecordName(), e); } }
@Override public void write(Kryo kryo, Output output, StructuredRecord record) { // First write out the schema kryo.writeObject(output, record.getSchema()); // Then write out the data try { DATUM_WRITER.encode(record, new KryoEncoder(output)); } catch (IOException e) { throw new KryoException("Failed to encode StructuredRecord " + record.getSchema().getRecordName(), e); } }
@Override public JsonElement serialize(StructuredRecord src, Type typeOfSrc, JsonSerializationContext context) { JsonObject obj = new JsonObject(); for (Schema.Field field : src.getSchema().getFields()) { obj.add(field.getName(), context.serialize(src.get(field.getName()))); } return obj; } }
@Override public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception { Schema outSchema = config.getOutputSchema(input.getSchema()); StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema); for (Schema.Field inField : input.getSchema().getFields()) { outputBuilder.set(config.prefix + inField.getName(), input.get(inField.getName())); } emitter.emit(outputBuilder.build()); }
@Override public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception { StructuredRecord.Builder builder = StructuredRecord.builder(getOutputSchema(input.getSchema())); for (Schema.Field field : input.getSchema().getFields()) { String fieldName = field.getName(); Object val = input.get(fieldName); if (fieldName.equals(config.field) && val != null) { emitter.emitError(new InvalidEntry<>(5, "Field " + config.field + " was not null", input)); return; } builder.set(fieldName, input.get(fieldName)); } emitter.emit(builder.build()); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], Put>> emitter) throws Exception { byte[] ts = Bytes.toBytes(System.currentTimeMillis()); byte[] rowkey = Bytes.concat(ts, Bytes.toBytes(UUID.randomUUID())); Put put = new Put(rowkey); put.add(SCHEMA_COL, input.getSchema().toString()); put.add(RECORD_COL, StructuredRecordStringConverter.toJsonString(input)); emitter.emit(new KeyValue<>(rowkey, put)); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], Put>> emitter) throws Exception { byte[] rowkey = Bytes.toBytes(UUID.randomUUID()); Put put = new Put(rowkey); put.add(SCHEMA_COL, input.getSchema().toString()); put.add(RECORD_COL, StructuredRecordStringConverter.toJsonString(input)); emitter.emit(new KeyValue<>(rowkey, put)); }
private static void writeInput(DataSetManager<Table> tableManager, @Nullable String rowKey, Iterable<StructuredRecord> records) throws Exception { tableManager.flush(); Table table = tableManager.get(); // write each record as a separate row, with the serialized record as one column and schema as another // each rowkey will be a UUID. for (StructuredRecord record : records) { byte[] row = rowKey == null ? Bytes.toBytes(UUID.randomUUID()) : Bytes.toBytes(rowKey); table.put(row, SCHEMA_COL, Bytes.toBytes(record.getSchema().toString())); table.put(row, RECORD_COL, Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record))); } tableManager.flush(); }
@Override public void groupBy(StructuredRecord record, Emitter<StructuredRecord> emitter) throws Exception { if (uniqueFields == null) { emitter.emit(record); return; } StructuredRecord.Builder builder = StructuredRecord.builder(getGroupKeySchema(record.getSchema())); for (String fieldName : uniqueFields) { builder.set(fieldName, record.get(fieldName)); } emitter.emit(builder.build()); }
@Override public void groupBy(StructuredRecord record, Emitter<StructuredRecord> emitter) throws Exception { // app should provide some way to make some data calculated in configurePipeline available here. // then we wouldn't have to calculate schema here StructuredRecord.Builder builder = StructuredRecord.builder(getGroupKeySchema(record.getSchema())); for (String groupByField : conf.getGroupByFields()) { builder.set(groupByField, record.get(groupByField)); } emitter.emit(builder.build()); }
@Override public void groupBy(StructuredRecord record, Emitter<StructuredRecord> emitter) { if (fields == null) { emitter.emit(record); return; } Schema recordSchema = outputSchema == null ? getOutputSchema(record.getSchema(), fields) : outputSchema; StructuredRecord.Builder builder = StructuredRecord.builder(recordSchema); for (String fieldName : fields) { builder.set(fieldName, record.get(fieldName)); } emitter.emit(builder.build()); }
@Override public StructuredRecord merge(StructuredRecord joinKey, Iterable<JoinElement<StructuredRecord>> joinRow) { StructuredRecord.Builder outRecordBuilder; outRecordBuilder = StructuredRecord.builder(outputSchema); for (JoinElement<StructuredRecord> joinElement : joinRow) { StructuredRecord record = joinElement.getInputRecord(); for (Schema.Field field : record.getSchema().getFields()) { outRecordBuilder.set(field.getName(), record.get(field.getName())); } } return outRecordBuilder.build(); }
public static StructuredRecord transform(StructuredRecord record, Schema schema) throws IOException { StructuredRecord.Builder builder = StructuredRecord.builder(schema); for (Schema.Field field : record.getSchema().getFields()) { String name = field.getName(); // If the field name is not in the output, then skip it and if it's not nullable, then // it would be error out -- in which case, the user has to fix the schema to proceed. if (schema.getField(name) != null) { builder.set(name, convertField(record.get(name), field.getSchema())); } } return builder.build(); }