return new Path(""); for (Schema.Field field : record.getSchema().getFields()) { String sanitizedName = HadoopUtils.sanitizePath(field.name(), "_"); String sanitizedValue = HadoopUtils.sanitizePath(record.get(field.name()).toString(), "_"); if (replacePathSeparators) { sanitizedName = sanitizedName.replaceAll(Path.SEPARATOR, "_"); return new Path(Joiner.on(Path.SEPARATOR).join(tokens));
public static void fillComplexFullResyncDelta(GenericRecord delta) { GenericRecord testField2 = new GenericData.Record(getSchemaByFullName( delta.getSchema().getField("testField2").schema().getTypes(), "org.kaa.config.testRecordT")); testField2.put("testField3", 456); byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema() .getField("__uuid").schema(), rawUuid); delta.put("testField1", "abc"); delta.put("testField2", testField2); delta.put("__uuid", uuid); }
@Override public GenericRecord partitionForRecord(GenericRecord record) { GenericRecord partition = new GenericData.Record(SCHEMA); partition.put(TITLE, record.get("title")); return partition; } }
@Override public GenericRecord partitionForRecord(GenericRecord record) { GenericRecord partition = new GenericData.Record(SCHEMA); partition.put(SCHEMA_STRING, record.getSchema().toString()); return partition; } }
@Override public Iterable<JsonObject> convertRecord(JsonArray outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { Map<String, Object> record = Maps.newHashMap(); for (Field field : inputRecord.getSchema().getFields()) { Object col = inputRecord.get(field.name()); if (col != null && col instanceof Utf8) { col = col.toString(); } record.put(field.name(), col); } return Collections.singleton(this.gson.fromJson(this.gson.toJson(record), JsonObject.class).getAsJsonObject()); } }
new Path(fileName), orcConfig, orcSchema, while (reader.hasNext()) { currRecord = reader.next(currRecord); List<Schema.Field> fields = currRecord.getSchema().getFields(); if (fields != null) { Object[] row = new Object[fields.size()]; for (int i = 0; i < fields.size(); i++) { Schema.Field field = fields.get(i); Schema fieldSchema = field.schema(); Object o = currRecord.get(field.name()); try { row[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), o); } catch (ArrayIndexOutOfBoundsException aioobe) { getLogger().error("Index out of bounds at record {} for column {}, type {}, and object {}", new Object[]{recordCount, i, fieldSchema.getType().getName(), o.toString()}, aioobe); throw new IOException(aioobe); : NiFiOrcUtils.normalizeHiveTableName(hiveAvroSchema.get().getFullName()); String hiveDDL = NiFiOrcUtils.generateHiveDDL(hiveAvroSchema.get(), hiveTableName);
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException { Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*"); FileStatus[] statuses = fileSystem.globStatus(logsPath); List<TestLogData> resultTestLogs = new ArrayList<>(); Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString()); for (FileStatus status : statuses) { FileReader<GenericRecord> fileReader = null; try { SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf()); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema); fileReader = DataFileReader.openReader(input, datumReader); for (GenericRecord record : fileReader) { RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD); Assert.assertEquals(header, recordHeader); TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD); resultTestLogs.add(recordData); } } finally { IOUtils.closeQuietly(fileReader); } } Assert.assertEquals(testLogs, resultTestLogs); }
@Test public void testSerializeAsPath() throws Exception { Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}, " + "{\"name\": \"title\", \"type\": \"string\"}" + "]}"); GenericRecord partition = new GenericData.Record(schema); partition.put("name", "a/b:c\\d e"); partition.put("title", "title"); Assert.assertEquals(AvroUtils.serializeAsPath(partition, true, true), new Path("name=a_b_c_d_e/title=title")); Assert.assertEquals(AvroUtils.serializeAsPath(partition, false, true), new Path("a_b_c_d_e/title")); Assert.assertEquals(AvroUtils.serializeAsPath(partition, false, false), new Path("a/b_c_d_e/title")); }
@Test public void testNullableAvroImport() throws IOException, SQLException { String [] types = { "INT" }; String [] vals = { null }; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record1 = reader.next(); assertNull(record1.get("DATA_COL0")); }
public static void fillArrayFullResyncDelta(GenericRecord delta) { byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema() .getField("__uuid").schema(), rawUuid); delta.put("__uuid", uuid); delta.put("testField1", testField1); GenericRecord itemRecord1 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord1.put("testField2", 1); byte[] rawItemUuid1 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; GenericFixed itemUuid1 = new GenericData.Fixed(itemRecord1 .getSchema().getField("__uuid").schema(), rawItemUuid1); itemRecord1.put("__uuid", itemUuid1); GenericRecord itemRecord2 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord2.put("testField2", 2); byte[] rawItemUuid2 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}; GenericFixed itemUuid2 = new GenericData.Fixed(itemRecord2 .getSchema().getField("__uuid").schema(), rawItemUuid2); itemRecord2.put("__uuid", itemUuid2); GenericRecord itemRecord3 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord3.put("testField2", 3);
@SuppressWarnings(value="unchecked") private Object generate(Schema schema, Random random, int d) { switch (schema.getType()) { case RECORD: GenericRecord record = new GenericData.Record(schema); for (Schema.Field field : schema.getFields()) { Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d+1) : GenericData.get().getDefaultValue(field); record.put(field.name(), value); List<String> symbols = schema.getEnumSymbols(); return new GenericData.EnumSymbol (schema, symbols.get(random.nextInt(symbols.size()))); new GenericData.Array(length<=0?0:length, schema); for (int i = 0; i < length; i++) array.add(generate(schema.getElementType(), random, d+1)); return array; case MAP:
@Test public void testByteBufferDeepCopy() { // Test that a deep copy of a byte buffer respects the byte buffer // limits and capacity. byte[] buffer_value = {0, 1, 2, 3, 0, 0, 0}; ByteBuffer buffer = ByteBuffer.wrap(buffer_value, 1, 4); Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); Field byte_field = new Field("bytes", Schema.create(Type.BYTES), null, null); schema.setFields(Arrays.asList(byte_field)); GenericRecord record = new GenericData.Record(schema); record.put(byte_field.name(), buffer); GenericRecord copy = GenericData.get().deepCopy(schema, record); ByteBuffer buffer_copy = (ByteBuffer) copy.get(byte_field.name()); assertEquals(buffer, buffer_copy); }
GenericRecord recordNew1 = new GenericData.Record(schemaParser.getTypes().get("org.kaaproject.recordT")); recordNew1.put("intField", 4); GenericFixed uuidNew1 = AvroUtils.generateUuidObject(); recordNew1.put(CommonConstants.UUID_FIELD, uuidNew1); GenericRecord recordNew2 = new GenericData.Record(schemaParser.getTypes().get("org.kaaproject.recordT")); recordNew2.put("intField", 5); recordNew2.put(CommonConstants.UUID_FIELD, uuidNew1); GenericRecord rootNew = new GenericData.Record(avroSchema); GenericArray arrayNew = new GenericData.Array<>(2, rootNew.getSchema().getField("complexArrayField").schema()); arrayNew.add(recordNew1); arrayNew.add(recordNew2); rootNew.put("complexArrayField", arrayNew); recordOld1.put("intField", 6); recordOld1.put(CommonConstants.UUID_FIELD, uuidNew1); recordOld2.put("intField", 7); GenericFixed uuidOld2 = AvroUtils.generateUuidObject(); recordOld2.put(CommonConstants.UUID_FIELD, uuidOld2); GenericArray arrayOld = new GenericData.Array<>(2, rootOld.getSchema().getField("complexArrayField").schema()); arrayOld.add(recordOld1); arrayOld.add(recordOld2); rootOld.put("complexArrayField", arrayOld); rootOld.put(CommonConstants.UUID_FIELD, AvroUtils.generateUuidObject()); GenericArray processedArray = (GenericArray) processedConfiguration.get("complexArrayField");
Schema rootSchema = root.getSchema(); for (Field field : rootSchema.getFields()) { Object value = root.get(field.name()); if (value instanceof List) { List<Object> values = (List<Object>) value; Schema arraySchema = getArraySchema(delta, field.name()); GenericArray deltaArray = new GenericData.Array(values.size(), arraySchema); for (Object item : values) { addComplexItemToArray(record, deltaArray); } else { deltaArray.add(item); delta.put(field.name(), deltaArray); } else if (value instanceof GenericContainer) { processComplexField(delta, field.name(), (GenericContainer) value, null, null); } else { delta.put(field.name(), value);
Protocol protocol = new Protocol("Simple", "org.apache.avro.test"); List<Field> params = new ArrayList<>(); params.add(new Field("record", record, null, null)); protocol.createMessage("echo", null, Schema.createRecord(params), record, Schema.createUnion(new ArrayList<>())); try { GenericRequestor r = new GenericRequestor(protocol, t); GenericRecord args = new GenericData.Record(message.getRequest()); GenericRecord rec = new GenericData.Record(record); rec.put("name", new Utf8("foo")); rec.put("kind", new GenericData.EnumSymbol (PROTOCOL.getType("Kind"), "BAR")); rec.put("hash", new GenericData.Fixed (PROTOCOL.getType("MD5"), new byte[]{0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5})); rec.put("extra", Boolean.TRUE); args.put("record", rec); GenericRecord response = (GenericRecord)r.request("echo", args); assertEquals(rec, response);
String newStr = new String(baosNew.toByteArray(), "UTF-8"); BaseData oldData = new BaseData(new BaseSchema(schema.toString()), oldStr); BaseData newData = new BaseData(new BaseSchema(schema.toString()), newStr); RawBinaryDelta deltaResult = calculator.calculate(oldData, newData); GenericRecord delta1 = new GenericData.Record(getDeltaSchemaByFullName(protocolSchema, "org.kaa.config.testT")); GenericEnumSymbol reset = new GenericData.EnumSymbol(getSchemaByFullName(delta1.getSchema().getField("testField1").schema().getTypes(), "org.kaaproject.configuration.resetT"), "reset"); byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta1.getSchema().getField("__uuid").schema(), rawUuid); delta1.put("testField1", reset); delta1.put("__uuid", uuid); GenericRecord delta2 = new GenericData.Record(getDeltaSchemaByFullName(protocolSchema, "org.kaa.config.testT")); delta2.put("__uuid", uuid); GenericArray<Integer> testField1 = new GenericData.Array<Integer>(3, getArraySchema(delta2, "testField1")); testField1.add(321); testField1.add(456); testField1.add(654); delta2.put("testField1", testField1);
@Test /** Construct and use a different protocol whose "hello" method has an extra argument to check that schema is sent to parse request. */ public void testHandshake() throws IOException { Protocol protocol = new Protocol("Simple", "org.apache.avro.test"); List<Field> fields = new ArrayList<>(); fields.add(new Schema.Field("extra", Schema.create(Schema.Type.BOOLEAN), null, null)); fields.add(new Schema.Field("greeting", Schema.create(Schema.Type.STRING), null, null)); Protocol.Message message = protocol.createMessage("hello", null /* doc */, Schema.createRecord(fields), Schema.create(Schema.Type.STRING), Schema.createUnion(new ArrayList<>())); protocol.getMessages().put("hello", message); Transceiver t = new SocketTransceiver(new InetSocketAddress(server.getPort())); try { GenericRequestor r = new GenericRequestor(protocol, t); GenericRecord params = new GenericData.Record(message.getRequest()); params.put("extra", Boolean.TRUE); params.put("greeting", new Utf8("bob")); Utf8 response = (Utf8)r.request("hello", params); assertEquals(new Utf8("goodbye"), response); } finally { t.close(); } }
Schema recordASchema = Schema.createRecord(Arrays.asList(arrayBField)); Schema.Field recordAField = new Schema.Field("recordA", recordASchema, "", null); Schema rootRecordSchema = Schema.createRecord(Arrays.asList(arrayAField, recordAField)); GenericRecord mixedRecord = new GenericData.Record(rootRecordSchema); arrayA.add(9); arrayA.add(8); arrayA.add(2); arrayA.add(1); mixedRecord.put("arrayA", arrayA); arrayB.add("ghi"); arrayB.add("def"); recordA.put("arrayB", arrayB); mixedRecord.put("recordA", recordA); GenericArray<Integer> newArrayA = (GenericArray<Integer>) mixedRecord.get("arrayA"); for (int i = 0; i < newArrayA.size() - 1; ++i) { Assert.assertTrue("Items are in bad order", newArrayA.get(i) < newArrayA.get(i + 1)); GenericArray<String> newArrayB = (GenericArray<String>) ((GenericRecord) mixedRecord.get("recordA")).get("arrayB"); for (int i = 0; i < newArrayB.size() - 1; ++i) { Assert.assertTrue("Items are in bad order", newArrayB.get(i).compareTo(newArrayB.get(i + 1)) < 0);
GenericRecord recordWithUuid = new GenericData.Record(recordSchema); byte[] uuid_value = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; GenericFixed uuid = (GenericFixed) GenericData.get().createFixed(null, uuid_value, uuidSchema); recordWithUuid.put("__uuid", uuid); Assert.assertNull("Uuid is still present after removal in recordWithoutUuid", recordWithUuid.get("__uuid")); GenericRecord inArrayRecord1 = new GenericData.Record(inArrayRecordSchema); inArrayRecord1.put("__uuid", GenericData.get().createFixed(null, uuid_value, uuidSchema)); inArrayRecord1.put("int_value", new Integer(100)); GenericRecord inArrayRecord2 = new GenericData.Record(inArrayRecordSchema); inArrayRecord2.put("__uuid", GenericData.get().createFixed(null, uuid_value, uuidSchema)); inArrayRecord2.put("int_value", new Integer(200)); complexRecordWithUuid.put("inner", recordWithUuid); complexRecordWithUuid.put("some_field", GenericData.get().createFixed(null, otherFixedValue, otherFixedSchema)); complexRecordWithUuid.put("__uuid", uuid); complexRecordWithUuid.put("array1", array1); Assert.assertNull("Uuid is still present after removal in complexRecordWithoutUuid", complexRecordWithUuid.get("__uuid")); Assert.assertNotNull("some_field in complexRecordWithoutUuid is null", complexRecordWithUuid.get("some_field")); GenericRecord innerRecordWithoutUuid = (GenericRecord) complexRecordWithUuid.get("inner"); Assert.assertNotNull("innerRecordWithoutUuid is null", innerRecordWithoutUuid); Assert.assertNull("Uuid is still present after removal in innerRecordWithoutUuid", innerRecordWithoutUuid.get("__uuid")); GenericArray<GenericRecord> array1WithoutUuids = (GenericArray<GenericRecord>) complexRecordWithUuid.get("array1"); for (GenericRecord rec : array1WithoutUuids) {