@Override public Optional<DataType> getDataType(final String fieldName) { final RecordField field = fieldMap.get(fieldName); if (field == null) { return Optional.empty(); } return Optional.of(field.getDataType()); }
private static String createText(final List<RecordField> fields) { final StringBuilder sb = new StringBuilder("["); for (int i = 0; i < fields.size(); i++) { final RecordField field = fields.get(i); sb.append("\""); sb.append(field.getFieldName()); sb.append("\" : \""); sb.append(field.getDataType()); sb.append("\""); if (i < fields.size() - 1) { sb.append(", "); } } sb.append("]"); return sb.toString(); }
public static String generateHiveDDL(RecordSchema recordSchema, String tableName, boolean hiveFieldNames) { StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS `"); sb.append(tableName); sb.append("` ("); List<String> hiveColumns = new ArrayList<>(); List<RecordField> fields = recordSchema.getFields(); if (fields != null) { hiveColumns.addAll( fields.stream().map(field -> "`" + (hiveFieldNames ? field.getFieldName().toLowerCase() : field.getFieldName()) + "` " + getHiveTypeFromFieldType(field.getDataType(), hiveFieldNames)).collect(Collectors.toList())); } sb.append(StringUtils.join(hiveColumns, ", ")); sb.append(") STORED AS ORC"); return sb.toString(); }
public static String getHiveSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> hiveFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { hiveFields.add((hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + ":" + getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames)); }); return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">"; } return null; }
private static FieldValue validateParent(final FieldValue parent) { Objects.requireNonNull(parent, "Cannot create an ArrayIndexFieldValue without a parent"); if (RecordFieldType.ARRAY != parent.getField().getDataType().getFieldType()) { throw new IllegalArgumentException("Cannot create an ArrayIndexFieldValue with a parent of type " + parent.getField().getDataType().getFieldType()); } final Object parentRecord = parent.getValue(); if (parentRecord == null) { throw new IllegalArgumentException("Cannot create an ArrayIndexFieldValue without a parent Record"); } return parent; }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } RecordField other = (RecordField) obj; return dataType.equals(other.getDataType()) && fieldName.equals(other.getFieldName()) && aliases.equals(other.getAliases()) && Objects.equals(defaultValue, other.defaultValue) && nullable == other.nullable; }
private String getFormat(final RecordField field) { final DataType dataType = field.getDataType(); switch (dataType.getFieldType()) { case DATE: return dateFormat; case TIME: return timeFormat; case TIMESTAMP: return timestampFormat; } return dataType.getFormat(); }
private static boolean isMergeRequired(final RecordField thisField, final RecordField otherField) { if (!thisField.getDataType().equals(otherField.getDataType())) { return true; } if (!thisField.getAliases().equals(otherField.getAliases())) { return true; } if (!Objects.equals(thisField.getDefaultValue(), otherField.getDefaultValue())) { return true; } return false; }
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> orcFieldNames = new ArrayList<>(recordFields.size()); List<TypeInfo> orcFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName(); orcFieldNames.add(fieldName); orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames)); }); return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); } return null; }
public static Predicate<FieldValue> fieldTypeFilter(final RecordFieldType fieldType, final RecordFieldType... alternativeTypes) { return fieldVal -> { final RecordFieldType recordFieldType = fieldVal.getField().getDataType().getFieldType(); if (recordFieldType == fieldType) { return true; } for (final RecordFieldType alternate : alternativeTypes) { if (recordFieldType == alternate) { return true; } } return false; }; }
@Override public void setValue(final String fieldName, final Object value) { final Optional<RecordField> field = getSchema().getField(fieldName); if (!field.isPresent()) { if (dropUnknownFields) { return; } final Object previousValue = values.put(fieldName, value); if (!Objects.equals(value, previousValue)) { serializedForm = Optional.empty(); } return; } final RecordField recordField = field.get(); final Object coerced = isTypeChecked() ? DataTypeUtils.convertType(value, recordField.getDataType(), fieldName) : value; final Object previousValue = values.put(recordField.getFieldName(), coerced); if (!Objects.equals(coerced, previousValue)) { serializedForm = Optional.empty(); } }
public static RecordField merge(final RecordField thisField, final RecordField otherField) { final String fieldName = thisField.getFieldName(); final Set<String> aliases = new HashSet<>(); aliases.addAll(thisField.getAliases()); aliases.addAll(otherField.getAliases()); final Object defaultValue; if (thisField.getDefaultValue() == null && otherField.getDefaultValue() != null) { defaultValue = otherField.getDefaultValue(); } else { defaultValue = thisField.getDefaultValue(); } final DataType dataType; if (thisField.getDataType().equals(otherField.getDataType())) { dataType = thisField.getDataType(); } else { dataType = RecordFieldType.CHOICE.getChoiceDataType(thisField.getDataType(), otherField.getDataType()); } return new RecordField(fieldName, dataType, defaultValue, aliases, thisField.isNullable() || otherField.isNullable()); }
public static boolean isRecord(final FieldValue fieldValue) { final DataType dataType = fieldValue.getField().getDataType(); final Object value = fieldValue.getValue(); return isRecord(dataType, value); }
@Override public void write(final Record record) throws IOException { if (recordFields != null) { for (int i = 0; i < numRecordFields; i++) { final RecordField field = recordFields.get(i); final DataType fieldType = field.getDataType(); final String fieldName = field.getFieldName(); Object o = record.getValue(field); try { workingRow[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldType, hiveFieldNames), o, hiveFieldNames); } catch (ArrayIndexOutOfBoundsException aioobe) { final String errorMsg = "Index out of bounds for column " + i + ", type " + fieldName + ", and object " + o.toString(); throw new IOException(errorMsg, aioobe); } } orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, workingRow)); } }
/** * Writes each SolrDocument to a record. */ public static RecordSet solrDocumentsToRecordSet(final List<SolrDocument> docs, final RecordSchema schema) { final List<Record> lr = new ArrayList<Record>(); for (SolrDocument doc : docs) { final Map<String, Object> recordValues = new LinkedHashMap<>(); for (RecordField field : schema.getFields()){ final Object fieldValue = doc.getFieldValue(field.getFieldName()); if (fieldValue != null) { if (field.getDataType().getFieldType().equals(RecordFieldType.ARRAY)){ recordValues.put(field.getFieldName(), ((List<Object>) fieldValue).toArray()); } else { recordValues.put(field.getFieldName(), fieldValue); } } } lr.add(new MapRecord(schema, recordValues)); } return new ListRecordSet(schema, lr); }
@Override @SuppressWarnings("unchecked") public Stream<FieldValue> evaluate(final RecordPathEvaluationContext context) { final Stream<FieldValue> parentResult = getParentPath().evaluate(context); return parentResult .filter(Filters.fieldTypeFilter(RecordFieldType.MAP)) .flatMap(fieldValue -> { final Map<String, ?> map = (Map<String, ?>) fieldValue.getValue(); return mapKeys.stream().map(key -> { final DataType valueType = ((MapDataType) fieldValue.getField().getDataType()).getValueType(); final RecordField elementField = new RecordField(fieldValue.getField().getFieldName(), valueType); return new MapEntryFieldValue(map.get(key), elementField, fieldValue, key); }); }); } }
private Map<String, Object> checkTypes(final Map<String, Object> values, final RecordSchema schema) { for (final RecordField field : schema.getFields()) { Object value = getExplicitValue(field, values); if (value == null) { if (field.isNullable() || field.getDefaultValue() != null) { continue; } throw new SchemaValidationException("Field " + field.getFieldName() + " cannot be null"); } if (!DataTypeUtils.isCompatibleDataType(value, field.getDataType())) { throw new SchemaValidationException("Field " + field.getFieldName() + " has a value of " + value + ", which cannot be coerced into the appropriate data type of " + field.getDataType()); } } return values; }
@Override public Stream<FieldValue> evaluate(final RecordPathEvaluationContext context) { final Stream<FieldValue> parentResult = getParentPath().evaluate(context); return parentResult .filter(Filters.fieldTypeFilter(RecordFieldType.MAP)) .map(fieldValue -> { final DataType valueType = ((MapDataType) fieldValue.getField().getDataType()).getValueType(); final RecordField elementField = new RecordField(fieldValue.getField().getFieldName(), valueType); return new MapEntryFieldValue(getMapValue(fieldValue), elementField, fieldValue, mapKey); }); }
private static Field buildAvroField(final RecordField recordField) { final Schema schema = buildAvroSchema(recordField.getDataType(), recordField.getFieldName(), recordField.isNullable()); final Field field = new Field(recordField.getFieldName(), schema, null, recordField.getDefaultValue()); for (final String alias : recordField.getAliases()) { field.addAlias(alias); } return field; }
@Override public Stream<FieldValue> evaluate(final RecordPathEvaluationContext context) { final Stream<FieldValue> parentResult = getParentPath().evaluate(context); return parentResult .filter(Filters.fieldTypeFilter(RecordFieldType.ARRAY)) .filter(fieldValue -> fieldValue.getValue() != null && ((Object[]) fieldValue.getValue()).length > getArrayIndex(((Object[]) fieldValue.getValue()).length)) .map(fieldValue -> { final ArrayDataType arrayDataType = (ArrayDataType) fieldValue.getField().getDataType(); final DataType elementDataType = arrayDataType.getElementType(); final RecordField arrayField = new RecordField(fieldValue.getField().getFieldName(), elementDataType); final Object[] values = (Object[]) fieldValue.getValue(); final int arrayIndex = getArrayIndex(values.length); final RecordField elementField = new RecordField(arrayField.getFieldName(), elementDataType); final FieldValue result = new ArrayIndexFieldValue(values[arrayIndex], elementField, fieldValue, arrayIndex); return result; }); }