@Override public int length(final Object o) { if (o instanceof List) { return ((List) o).size(); } else if (o instanceof GenericRecord) { return ((GenericRecord) o).getSchema().getFields().size(); } else { return 0; } }
@Override public Set<String> discoverRootFields(final GenericRecord obj) { return obj.getSchema() .getFields() .stream() .filter(AvroFlattenerMaker::isFieldPrimitive) .map(Schema.Field::name) .collect(Collectors.toSet()); }
@Override public Collection<String> getPropertyKeys(final Object o) { if (o == null) { return Collections.emptySet(); } else if (o instanceof Map) { return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet()); } else if (o instanceof GenericRecord) { return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet()); } else { throw new UnsupportedOperationException("Unused"); } }
@Override public List<String> getDeltaFieldNames(GenericRecord record) { try { return recordSchemaToDeltaSchemaCache.get(record.getSchema()); } catch (ExecutionException e) { throw new RuntimeException(e); } }
/** * AvroGenericRecordBolt must override this method because messages with different schemas cannot be written to the * same file. By treating the complete schema as the "key" AbstractHdfsBolt will associate a different writer for * every distinct schema. */ @Override protected String getWriterKey(Tuple tuple) { Schema recordSchema = ((GenericRecord) tuple.getValue(0)).getSchema(); return recordSchema.toString(); }
@Override public GenericRecord partitionForRecord(GenericRecord record) { GenericRecord partition = new GenericData.Record(SCHEMA); partition.put(SCHEMA_STRING, record.getSchema().toString()); return partition; } }
/** * Convert a GenericRecord to a byte array. */ public static byte[] recordToByteArray(GenericRecord record) throws IOException { try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema()); writer.write(record, encoder); byte[] byteArray = out.toByteArray(); return byteArray; } }
@Override protected AbstractHDFSWriter makeNewWriter(Path path, Tuple tuple) throws IOException { Schema recordSchema = ((GenericRecord) tuple.getValue(0)).getSchema(); return new AvroGenericRecordHDFSWriter(this.rotationPolicy, path, this.fs.create(path), recordSchema); } }
@Override public List<Tag<?>> generateTags(State state) { ImmutableList.Builder<Tag<?>> tags = ImmutableList.<Tag<?>> builder().addAll(super.generateTags(state)); tags.add(new Tag<>(PARTITION, this.partition)); for (Schema.Field field : this.partition.getSchema().getFields()) { tags.add(new Tag<>(field.name(), this.partition.get(field.name()))); } return tags.build(); } }
@Override public void write(DataOutput out) throws IOException { // Write schema since we need it to pull the data out. (see point #1 above) String schemaString = record.getSchema().toString(false); out.writeUTF(schemaString); schemaString = fileSchema.toString(false); out.writeUTF(schemaString); recordReaderID.write(out); // Write record to byte buffer GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(); BinaryEncoder be = EncoderFactory.get().directBinaryEncoder((DataOutputStream)out, null); gdw.setSchema(record.getSchema()); gdw.write(record, be); }
@Override public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { GenericRecord outputRecord = new GenericData.Record(outputSchema); for (Field field : inputRecord.getSchema().getFields()) { outputRecord.put(field.name(), convertFieldValue(outputSchema, field, inputRecord, workUnit)); } return new SingleRecordIterable<>(outputRecord); }
@Override public Schema getSchema() throws IOException { if (recordList == null) { return null; } if (recordList.isEmpty()) { return null; } return recordList.get(0).getSchema(); }
private List<Object> workerBase(List<Object> objectRow, Schema fileSchema, List<String> columnNames, List<TypeInfo> columnTypes, GenericRecord record) throws AvroSerdeException { for(int i = 0; i < columnNames.size(); i++) { TypeInfo columnType = columnTypes.get(i); String columnName = columnNames.get(i); Object datum = record.get(columnName); Schema datumSchema = record.getSchema().getField(columnName).schema(); Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName); objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType)); } return objectRow; }
@Override public byte[] getBytes(AvroGenericRecordWritable writable) { GenericRecord record = writable.getRecord(); byte[] valueBytes = null; try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { BinaryEncoder be = EncoderFactory.get().directBinaryEncoder(out, null); gdw.setSchema(record.getSchema()); gdw.write(record, be); out.flush(); valueBytes = out.toByteArray(); } catch (IOException e) { Throwables.propagate(new SerDeException(e)); } return valueBytes; }
@Override public AvroGenericRecordWritable getWritable(byte[] value) { GenericRecord avroRecord = null; try { avroRecord = dataReader.read(null, DecoderFactory.get().binaryDecoder(value, null)); } catch (IOException e) { Throwables.propagate(new SerDeException(e)); } avroGenericRecordWritable.setRecord(avroRecord); avroGenericRecordWritable.setRecordReaderID(uid); avroGenericRecordWritable.setFileSchema(avroRecord.getSchema()); return avroGenericRecordWritable; } }
private static GenericRecord find(Collection<GenericRecord> records, String field, String value) { for (GenericRecord record : records) { if (null == record.getSchema().getField(field)) { continue; } if (null != record.get(field) && record.get(field).toString().equals(value)) { return record; } } return null; } }
@Test public void partitionRecordTest() { Schema mockSchema = mock(Schema.class); GenericRecord mockRecord = mock(GenericRecord.class); String mockSchemaString = "returnSchemaString"; when(mockRecord.getSchema()).thenReturn(mockSchema); when(mockSchema.toString()).thenReturn(mockSchemaString); SchemaBasedWriterPartitioner partitioner = new SchemaBasedWriterPartitioner(null, 0, 0); GenericRecord partitionRecord = partitioner.partitionForRecord(mockRecord); Assert.assertEquals(partitionRecord.get(SchemaBasedWriterPartitioner.SCHEMA_STRING), mockSchemaString); } }
/** * In case of complex data types in union {@link AvroUtils#getFieldSchema(Schema, String)} should throw {@link AvroRuntimeException} * @throws IOException */ @Test(expectedExceptions = AvroRuntimeException.class) public void testComplexTypesInUnionNotSupported() throws IOException { final String TEST_LOCATION = "TestUnionObject.RecordInUnion"; String avroFilePath = this.AVRO_DIR + "avroDir/avroUtilsTestFile.avro"; GenericRecord record = getRecordFromFile(avroFilePath).get(0); AvroUtils.getFieldSchema(record.getSchema(), TEST_LOCATION); }
@BeforeTest public void setUp() throws SchemaConversionException { sampleRecord = TestUtils.generateRandomAvroRecord(); state = new WorkUnitState(); converter = new AvroToJsonRecordWithMetadataConverter(); converter.convertSchema(sampleRecord.getSchema(), state); }
@BeforeTest public void setUp() throws SchemaConversionException { sampleRecord = TestUtils.generateRandomAvroRecord(); state = new WorkUnitState(); converter = new AvroToJsonStringConverter(); converter.convertSchema(sampleRecord.getSchema(), state); }