private StructuredRecord.Builder getOutputBuilder(StructuredRecord input) { List<Schema.Field> outFields = new ArrayList<>(); for (Schema.Field field : input.getSchema().getFields()) { outFields.add(field); } Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields); // copy all the values StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema); for (Schema.Field inField : input.getSchema().getFields()) { outFields.add(inField); outputBuilder.set(inField.getName(), input.get(inField.getName())); } return outputBuilder; }
@Override public void operateOn(StructuredRecord record) { if (record.get(fieldName) != null) { count++; } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); List<String> fields = Lists.newArrayList(bodyFields); builder.set("body", fields.toArray(new String[fields.size()])); return builder.build(); } }
/** * Converts a {@link StructuredRecord} to a delimited string. */ public static String toDelimitedString(final StructuredRecord record, String delimiter) { return record.getSchema().getFields().stream() .map(Schema.Field::getName) .map(record::get) .map(value -> value == null ? "" : value.toString()) .collect(Collectors.joining(delimiter)); }
/** * Build a {@link StructuredRecord} with the fields set by this builder. * * @return a {@link StructuredRecord} with the fields set by this builder * @throws UnexpectedFormatException if there is at least one non-nullable field without a value */ public StructuredRecord build() throws UnexpectedFormatException { // check that all non-nullable fields have a value. for (Schema.Field field : schema.getFields()) { String fieldName = field.getName(); if (!fields.containsKey(fieldName)) { // if the field is not nullable and there is no value set for the field, this is invalid. if (!field.getSchema().isNullable()) { throw new UnexpectedFormatException("Field " + fieldName + " must contain a value."); } else { // otherwise, set the value for the field to null fields.put(fieldName, null); } } } return new StructuredRecord(schema, fields); }
@Override public void operateOn(StructuredRecord record) { if (isFirst) { first = record.get(fieldName); firstRecord = record; isFirst = false; } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); List<String> fields = Lists.newArrayList(bodyFields); builder.set("body", fields.toArray(new String[fields.size()])); return builder.build(); } }
public static MapWritable covertToWritable(StructuredRecord record) throws IOException { MapWritable result = new MapWritable(); for (Schema.Field field : record.getSchema().getFields()) { try { result.put(new Text(field.getName()), getWritables(record.get(field.getName()), field.getSchema())); } catch (Exception e) { throw(new IOException(String.format("Type exception for field %s: %s", field.getName(), e.getMessage()))); } } return result; }
/** * Converts a {@link StructuredRecord} to a delimited string. */ public static String toDelimitedString(final StructuredRecord record, String delimiter) { return Joiner.on(delimiter).join( Iterables.transform(record.getSchema().getFields(), new Function<Schema.Field, String>() { @Override public String apply(Schema.Field field) { return record.get(field.getName()).toString(); } })); }
/** * Build a {@link StructuredRecord} with the fields set by this builder. * * @return a {@link StructuredRecord} with the fields set by this builder * @throws UnexpectedFormatException if there is at least one non-nullable field without a value */ public StructuredRecord build() throws UnexpectedFormatException { // check that all non-nullable fields have a value. for (Schema.Field field : schema.getFields()) { String fieldName = field.getName(); if (!fields.containsKey(fieldName)) { // if the field is not nullable and there is no value set for the field, this is invalid. if (!field.getSchema().isNullable()) { throw new UnexpectedFormatException("Field " + fieldName + " must contain a value."); } else { // otherwise, set the value for the field to null fields.put(fieldName, null); } } } return new StructuredRecord(schema, fields); }
@Override public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception { Schema outSchema = config.getOutputSchema(input.getSchema()); StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema); for (Schema.Field inField : input.getSchema().getFields()) { outputBuilder.set(config.prefix + inField.getName(), input.get(inField.getName())); } emitter.emit(outputBuilder.build()); }
@Override public void operateOn(StructuredRecord record) { Object val = record.get(fieldName); if (val == null) { return; } count++; avg = avg + (((Number) val).doubleValue() - avg) / count; }
/** * Converts a delimited string to a {@link StructuredRecord} based on the schema. */ public static StructuredRecord fromDelimitedString(String delimitedString, String delimiter, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fields = schema.getFields().iterator(); for (String part : Splitter.on(delimiter).split(delimitedString)) { if (!part.isEmpty()) { builder.convertAndSet(fields.next().getName(), part); } } return builder.build(); }
@Override public GenericRecord transform(StructuredRecord structuredRecord, co.cask.cdap.api.data.schema.Schema schema) throws IOException { co.cask.cdap.api.data.schema.Schema structuredRecordSchema = structuredRecord.getSchema(); Schema avroSchema = getAvroSchema(schema); GenericRecordBuilder recordBuilder = new GenericRecordBuilder(avroSchema); for (Schema.Field field : avroSchema.getFields()) { String fieldName = field.name(); co.cask.cdap.api.data.schema.Schema.Field schemaField = structuredRecordSchema.getField(fieldName); if (schemaField == null) { throw new IllegalArgumentException("Input record does not contain the " + fieldName + " field."); } recordBuilder.set(fieldName, convertField(structuredRecord.get(fieldName), schemaField.getSchema())); } return recordBuilder.build(); }
/** * Converts a {@link StructuredRecord} to a delimited string. */ public static String toDelimitedString(final StructuredRecord record, String delimiter) { return record.getSchema().getFields().stream() .map(Schema.Field::getName) .map(record::get) .map(value -> value == null ? "" : value.toString()) .collect(Collectors.joining(delimiter)); }
@Override public void groupBy(StructuredRecord record, Emitter<StructuredRecord> emitter) throws Exception { if (uniqueFields == null) { emitter.emit(record); return; } StructuredRecord.Builder builder = StructuredRecord.builder(getGroupKeySchema(record.getSchema())); for (String fieldName : uniqueFields) { builder.set(fieldName, record.get(fieldName)); } emitter.emit(builder.build()); }
private Long getValue(StructuredRecord record) { Object val = record.get(name); if (val != null) { return Double.valueOf(val.toString()).longValue(); } return null; } }
/** * Converts a delimited string to a {@link StructuredRecord} based on the schema. */ public static StructuredRecord fromDelimitedString(String delimitedString, String delimiter, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fields = schema.getFields().iterator(); for (String part : Splitter.on(delimiter).split(delimitedString)) { if (!part.isEmpty()) { builder.convertAndSet(fields.next().getName(), part); } } return builder.build(); }
public Put toPut(StructuredRecord record) { Schema recordSchema = record.getSchema(); Preconditions.checkArgument(recordSchema.getType() == Schema.Type.RECORD, "input must be a record."); Schema.Field keyField = getKeyField(recordSchema); Preconditions.checkArgument(keyField != null, "Could not find key field in record."); Put output = createPut(record, keyField); for (Schema.Field field : recordSchema.getFields()) { if (field.getName().equals(keyField.getName())) { continue; } // Skip fields that are not present in the Output Schema if (outputSchema != null && outputSchema.getField(field.getName()) == null) { continue; } setField(output, field, record.get(field.getName())); } return output; }