if(AvroSerdeUtils.insideMRJob(job)) { MapWork mapWork = Utilities.getMapWork(job); return AvroSerdeUtils.determineSchemaOrThrowException(job, props); if(s != null) { LOG.info("Found the avro schema in the job: " + s); return AvroSerdeUtils.getSchemaFor(s);
private List<Object> workerBase(List<Object> objectRow, Schema fileSchema, List<String> columnNames, List<TypeInfo> columnTypes, GenericRecord record) throws AvroSerdeException { for(int i = 0; i < columnNames.size(); i++) { TypeInfo columnType = columnTypes.get(i); String columnName = columnNames.get(i); Object datum = record.get(columnName); Schema datumSchema = record.getSchema().getField(columnName).schema(); Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName); objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType)); } return objectRow; }
case BINARY: if (schema.getType() == Type.BYTES){ return AvroSerdeUtils.getBufferFromBytes((byte[])fieldOI.getPrimitiveJavaObject(structFieldData)); } else if (schema.getType() == Type.FIXED){ Fixed fixed = new GenericData.Fixed(schema, (byte[])fieldOI.getPrimitiveJavaObject(structFieldData)); return AvroSerdeUtils.getBufferFromDecimal(dec, ((DecimalTypeInfo)typeInfo).scale()); case CHAR: HiveChar ch = (HiveChar)fieldOI.getPrimitiveJavaObject(structFieldData);
String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) return AvroSerdeUtils.getSchemaFor(schemaString); Schema s = getSchemaFromFS(schemaString, conf); if (s == null) { return AvroSerdeUtils.getSchemaFor(new URL(schemaString));
@Override public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { // AvroSerDe does not propagate initialization exceptions. Instead, it stores just an exception's message in // this.configErrors (see https://issues.apache.org/jira/browse/HIVE-7868). In Presto, such behavior is not // at all useful, as silenced exception usually carries important information which may be otherwise unavailable. try { return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch (IOException | AvroSerdeException e) { throw new RuntimeException(e); } } }
public static Schema getSchemaFor(URL url) { InputStream in = null; try { in = url.openStream(); return getSchemaFor(in); } catch (Exception e) { throw new RuntimeException("Failed to parse Avro schema", e); } finally { if (in != null) { try { in.close(); } catch (IOException e) { // Ignore } } } }
FileSystem fs = split.getPath().getFileSystem(job); if(AvroSerdeUtils.insideMRJob(job)) { MapredWork mapRedWork = Utilities.getMapRedWork(job); return AvroSerdeUtils.determineSchemaOrThrowException(props); } else return null; // If it's not in this property, it won't be in any others
return fixed.bytes(); } else if (recordSchema.getType() == Type.BYTES){ return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum); } else { throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType()); HiveDecimal dec = AvroSerdeUtils.getHiveDecimalFromByteBuffer((ByteBuffer) datum, scale); JavaHiveDecimalObjectInspector oi = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((DecimalTypeInfo)columnType);
private Object worker(Object datum, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { // Klaxon! Klaxon! Klaxon! // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(recordSchema)) return deserializeNullableUnion(datum, recordSchema, columnType); if(columnType == TypeInfoFactory.stringTypeInfo) return datum.toString(); // To workaround AvroUTF8 // This also gets us around the Enum issue since we just take the value // and convert it to a string. Yay! switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, recordSchema, (MapTypeInfo) columnType); default: return datum; // Simple type. } }
public static Buffer getBufferFromDecimal(HiveDecimal dec, int scale) { if (dec == null) { return null; } // NOTE: Previously, we did OldHiveDecimal.setScale(scale), called OldHiveDecimal // unscaledValue().toByteArray(). return AvroSerdeUtils.getBufferFromBytes(dec.bigIntegerBytesScaled(scale)); }
public static HiveDecimal getHiveDecimalFromByteBuffer(ByteBuffer byteBuffer, int scale) { byte[] result = getBytesFromByteBuffer(byteBuffer); HiveDecimal dec = HiveDecimal.create(new BigInteger(result), scale); return dec; }
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
protected static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) throws IOException, URISyntaxException { FSDataInputStream in = null; FileSystem fs = null; try { fs = FileSystem.get(new URI(schemaFSUrl), conf); } catch (IOException ioe) { //return null only if the file system in schema is not recognized if (LOG.isDebugEnabled()) { String msg = "Failed to open file system for uri " + schemaFSUrl + " assuming it is not a FileSystem url"; LOG.debug(msg, ioe); } return null; } try { in = fs.open(new Path(schemaFSUrl)); Schema s = AvroSerdeUtils.getSchemaFor(in); return s; } finally { if(in != null) in.close(); } }
/** * Determine the schema to that's been provided for Avro serde work. * @param properties containing a key pointing to the schema, one way or another * @return schema to use while serdeing the avro file * @throws IOException if error while trying to read the schema from another location * @throws AvroSerdeException if unable to find a schema or pointer to it in the properties */ public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties) throws IOException, AvroSerdeException { String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) return AvroSerdeUtils.getSchemaFor(schemaString); // Try pulling directly from URL schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()); if(schemaString == null || schemaString.equals(SCHEMA_NONE)) throw new AvroSerdeException(EXCEPTION_MESSAGE); try { Schema s = getSchemaFromFS(schemaString, conf); if (s == null) { //in case schema is not a file system return AvroSerdeUtils.getSchemaFor(new URL(schemaString).openStream()); } return s; } catch (IOException ioe) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe); } catch (URISyntaxException urie) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie); } }
return fixed.bytes(); } else if (recordSchema.getType() == Type.BYTES){ return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum); } else { throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType()); HiveDecimal dec = AvroSerdeUtils.getHiveDecimalFromByteBuffer((ByteBuffer) datum, scale); JavaHiveDecimalObjectInspector oi = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((DecimalTypeInfo)columnType);
private Object worker(Object datum, Schema fileSchema, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { // Klaxon! Klaxon! Klaxon! // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(recordSchema)) { return deserializeNullableUnion(datum, fileSchema, recordSchema); } switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, fileSchema, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, fileSchema, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, fileSchema, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, fileSchema, recordSchema, (MapTypeInfo) columnType); case PRIMITIVE: return deserializePrimitive(datum, fileSchema, recordSchema, (PrimitiveTypeInfo) columnType); default: throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory()); } }
public static Buffer getBufferFromDecimal(HiveDecimal dec, int scale) { if (dec == null) { return null; } // NOTE: Previously, we did OldHiveDecimal.setScale(scale), called OldHiveDecimal // unscaledValue().toByteArray(). return AvroSerdeUtils.getBufferFromBytes(dec.bigIntegerBytesScaled(scale)); }
public static HiveDecimal getHiveDecimalFromByteBuffer(ByteBuffer byteBuffer, int scale) { byte[] result = getBytesFromByteBuffer(byteBuffer); HiveDecimal dec = HiveDecimal.create(new BigInteger(result), scale); return dec; }
if (AvroSerdeUtils.isNullableType(schema)) { return AvroUtils.getFieldSchemaHelper(AvroSerdeUtils.getOtherTypeFromNullableType(schema), pathList, field);
if(AvroSerdeUtils.insideMRJob(job)) { MapWork mapWork = Utilities.getMapWork(job); return AvroSerdeUtils.determineSchemaOrThrowException(job, props); if(s != null) { LOG.info("Found the avro schema in the job: " + s); return AvroSerdeUtils.getSchemaFor(s);