@SuppressWarnings("unchecked") public K getKey() { return (K) mKeyValueRecord.get(KEY_FIELD); }
private List<Object> workerBase(List<Object> objectRow, Schema fileSchema, List<String> columnNames, List<TypeInfo> columnTypes, GenericRecord record) throws AvroSerdeException { for(int i = 0; i < columnNames.size(); i++) { TypeInfo columnType = columnTypes.get(i); String columnName = columnNames.get(i); Object datum = record.get(columnName); Schema datumSchema = record.getSchema().getField(columnName).schema(); Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName); objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType)); } return objectRow; }
@SuppressWarnings("unchecked") public V getValue() { return (V) mKeyValueRecord.get(VALUE_FIELD); }
private static GenericRecord find(Collection<GenericRecord> records, String field, String value) { for (GenericRecord record : records) { if (null == record.getSchema().getField(field)) { continue; } if (null != record.get(field) && record.get(field).toString().equals(value)) { return record; } } return null; } }
@Override public Result executePolicy(Object record) { if (!(record instanceof GenericRecord)) { return RowLevelPolicy.Result.FAILED; } GenericRecord header = (GenericRecord) ((GenericRecord) record).get("header"); if (header == null || header.get("guid") == null) { return RowLevelPolicy.Result.FAILED; } return RowLevelPolicy.Result.PASSED; } }
@Test public void partitionRecordTest() { Schema mockSchema = mock(Schema.class); GenericRecord mockRecord = mock(GenericRecord.class); String mockSchemaString = "returnSchemaString"; when(mockRecord.getSchema()).thenReturn(mockSchema); when(mockSchema.toString()).thenReturn(mockSchemaString); SchemaBasedWriterPartitioner partitioner = new SchemaBasedWriterPartitioner(null, 0, 0); GenericRecord partitionRecord = partitioner.partitionForRecord(mockRecord); Assert.assertEquals(partitionRecord.get(SchemaBasedWriterPartitioner.SCHEMA_STRING), mockSchemaString); } }
private static Object locateNode(GenericRecord element, String columnMapping) { Object value = element; for (String pathElement : Splitter.on('/').omitEmptyStrings().split(columnMapping)) { if (value == null) { return null; } value = ((GenericRecord) value).get(pathElement); } return value; }
@Override public Object getMapValue(final Object o, final String s) { if (o == null) { return null; } else if (o instanceof GenericRecord) { return ((GenericRecord) o).get(s); } else if (o instanceof Map) { return ((Map) o).get(s); } else { throw new UnsupportedOperationException(o.getClass().getName()); } }
@Override public Object getRootField(final GenericRecord record, final String key) { return transformValue(record.get(key)); }
private Object transformValue(final Object field) { if (fromPigAvroStorage && field instanceof GenericData.Array) { return Lists.transform((List) field, item -> String.valueOf(((GenericRecord) item).get(0))); } if (field instanceof ByteBuffer) { if (binaryAsString) { return StringUtils.fromUtf8(((ByteBuffer) field).array()); } else { return ((ByteBuffer) field).array(); } } if (field instanceof Utf8) { return field.toString(); } if (field instanceof List) { return ((List) field).stream().filter(Objects::nonNull).collect(Collectors.toList()); } return field; } }
@Override public String getValue(String fieldName, GenericRecord record) throws FieldMappingException { try { Object idValue = record.get(fieldName); return idValue.toString(); } catch (Exception e) { throw new FieldMappingException("Could not find field " + fieldName, e); } }
@Override public GenericRecord partitionForRecord(GenericRecord record) { GenericRecord partition = new GenericData.Record(SCHEMA); partition.put(TITLE, record.get("title")); return partition; } }
/** * Convert to the output value of a field */ protected Object convertFieldValue(Schema outputSchema, Field field, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { if (field.name().equals(payloadField)) { return upConvertPayload(inputRecord); } return inputRecord.get(field.name()); } }
@Override public Iterable<TupleDocument> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { String key = inputRecord.get(keyField).toString(); GenericRecord data = (GenericRecord) inputRecord.get(dataRecordField); ByteBuffer dataBytes = (ByteBuffer) data.get(valueField); Integer flags = (Integer) data.get(flagsField); ByteBuf buffer = Unpooled.copiedBuffer(dataBytes); return new SingleRecordIterable<>(new TupleDocument(key, Tuple.create(buffer, flags))); } }
@Override public List<Object> deserialize(ByteBuffer ser) { try { Schema schema = schemas.getSchema(schemaString); DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(Utils.toByteArray(ser), null); GenericRecord record = reader.read(null, decoder); ArrayList<Object> list = new ArrayList<>(fieldNames.size()); for (String field : fieldNames) { Object value = record.get(field); // Avro strings are stored using a special Avro Utf8 type instead of using Java primitives list.add(SerdeUtils.convertAvroUtf8(value)); } return list; } catch (IOException e) { throw new RuntimeException(e); } }
@Override public DataWriter build() throws IOException { String partition = this.partition.get().get(TestPartitioner.PARTITION).toString(); this.actions.add(new Action(Actions.BUILD, partition, null)); if (partition.matches(".*\\d+.*")) { return new SpeculativeNotSafeTestWriter(partition); } return new TestDataWriter(partition); }
@Test public void testRecord() throws DataConversionException { Iterable<RecordWithMetadata<JsonNode>> records = converter.convertRecord(null, sampleRecord, state); RecordWithMetadata<JsonNode> node = records.iterator().next(); Assert.assertEquals(node.getMetadata().getGlobalMetadata().getContentType(), "test.name+json"); Assert.assertEquals(node.getRecord().get("field1").getTextValue(), sampleRecord.get("field1").toString()); } }
@Test public void testGenericRecord() throws Exception { DeserializationSchema<GenericRecord> deserializationSchema = AvroDeserializationSchema.forGeneric( address.getSchema() ); byte[] encodedAddress = writeRecord(address, Address.getClassSchema()); GenericRecord genericRecord = deserializationSchema.deserialize(encodedAddress); assertEquals(address.getCity(), genericRecord.get("city").toString()); assertEquals(address.getNum(), genericRecord.get("num")); assertEquals(address.getState(), genericRecord.get("state").toString()); }
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
@Test public void testConvertToAvroStreamForDateTimeAsString() throws SQLException, IOException, ParseException { final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions .builder().convertNames(true).useLogicalTypes(false).build(); testConvertToAvroStreamForDateTime(options, (record, date) -> assertEquals(new Utf8(date.toString()), record.get("date")), (record, time) -> assertEquals(new Utf8(time.toString()), record.get("time")), (record, timestamp) -> assertEquals(new Utf8(timestamp.toString()), record.get("timestamp")) ); }