@Override public HiveSerDeConverter init(WorkUnitState state) { super.init(state); Configuration conf = HadoopUtils.getConfFromState(state); try { this.serializer = HiveSerDeWrapper.getSerializer(state).getSerDe(); this.deserializer = HiveSerDeWrapper.getDeserializer(state).getSerDe(); this.deserializer.initialize(conf, state.getProperties()); setColumnsIfPossible(state); this.serializer.initialize(conf, state.getProperties()); } catch (IOException e) { log.error("Failed to instantiate serializer and deserializer", e); throw Throwables.propagate(e); } catch (SerDeException e) { log.error("Failed to initialize serializer and deserializer", e); throw Throwables.propagate(e); } return this; }
@SuppressWarnings("deprecation") @Override public DataWriter<Writable> build() throws IOException { Preconditions.checkNotNull(this.destination); Preconditions.checkArgument(!Strings.isNullOrEmpty(this.writerId)); State properties = this.destination.getProperties(); if (!properties.contains(WRITER_WRITABLE_CLASS) || !properties.contains(WRITER_OUTPUT_FORMAT_CLASS)) { HiveSerDeWrapper serializer = HiveSerDeWrapper.getSerializer(properties); properties.setProp(WRITER_WRITABLE_CLASS, serializer.getSerDe().getSerializedClass().getName()); properties.setProp(WRITER_OUTPUT_FORMAT_CLASS, serializer.getOutputFormatClassName()); } return new HiveWritableHdfsDataWriter(this, properties); }
/** * Add an Avro {@link Schema} to the given {@link HiveRegistrationUnit}. * * <p> * If {@link #USE_SCHEMA_FILE} is true, the schema will be added via {@link #SCHEMA_URL} pointing to * the schema file named {@link #SCHEMA_FILE_NAME}. * </p> * * <p> * If {@link #USE_SCHEMA_FILE} is false, the schema will be obtained by {@link #getDirectorySchema(Path)}. * If the length of the schema is less than {@link #SCHEMA_LITERAL_LENGTH_LIMIT}, it will be added via * {@link #SCHEMA_LITERAL}. Otherwise, the schema will be written to {@link #SCHEMA_FILE_NAME} and added * via {@link #SCHEMA_URL}. * </p> */ @Override public void addSerDeProperties(Path path, HiveRegistrationUnit hiveUnit) throws IOException { Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(), path + " is not a directory."); Schema schema; try (Timer.Context context = metricContext.timer(HIVE_SPEC_SCHEMA_READING_TIMER).time()) { schema = getDirectorySchema(path); } if (schema == null) { return; } hiveUnit.setSerDeType(this.serDeWrapper.getSerDe().getClass().getName()); hiveUnit.setInputFormat(this.serDeWrapper.getInputFormatClassName()); hiveUnit.setOutputFormat(this.serDeWrapper.getOutputFormatClassName()); addSchemaProperties(path, hiveUnit, schema); }
@Override public HiveSerDeConverter init(WorkUnitState state) { super.init(state); Configuration conf = HadoopUtils.getConfFromState(state); try { this.serializer = HiveSerDeWrapper.getSerializer(state).getSerDe(); this.deserializer = HiveSerDeWrapper.getDeserializer(state).getSerDe(); this.deserializer.initialize(conf, state.getProperties()); setColumnsIfPossible(state); this.serializer.initialize(conf, state.getProperties()); } catch (IOException e) { log.error("Failed to instantiate serializer and deserializer", e); throw Throwables.propagate(e); } catch (SerDeException e) { log.error("Failed to initialize serializer and deserializer", e); throw Throwables.propagate(e); } return this; }
@SuppressWarnings("deprecation") @Override public DataWriter<Writable> build() throws IOException { Preconditions.checkNotNull(this.destination); Preconditions.checkArgument(!Strings.isNullOrEmpty(this.writerId)); State properties = this.destination.getProperties(); if (!properties.contains(WRITER_WRITABLE_CLASS) || !properties.contains(WRITER_OUTPUT_FORMAT_CLASS)) { HiveSerDeWrapper serializer = HiveSerDeWrapper.getSerializer(properties); properties.setProp(WRITER_WRITABLE_CLASS, serializer.getSerDe().getSerializedClass().getName()); properties.setProp(WRITER_OUTPUT_FORMAT_CLASS, serializer.getOutputFormatClassName()); } return new HiveWritableHdfsDataWriter(this, properties); }
/** * Add an Avro {@link Schema} to the given {@link HiveRegistrationUnit}. * * <p> * If {@link #USE_SCHEMA_FILE} is true, the schema will be added via {@link #SCHEMA_URL} pointing to * the schema file named {@link #SCHEMA_FILE_NAME}. * </p> * * <p> * If {@link #USE_SCHEMA_FILE} is false, the schema will be obtained by {@link #getDirectorySchema(Path)}. * If the length of the schema is less than {@link #SCHEMA_LITERAL_LENGTH_LIMIT}, it will be added via * {@link #SCHEMA_LITERAL}. Otherwise, the schema will be written to {@link #SCHEMA_FILE_NAME} and added * via {@link #SCHEMA_URL}. * </p> */ @Override public void addSerDeProperties(Path path, HiveRegistrationUnit hiveUnit) throws IOException { Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(), path + " is not a directory."); Schema schema; try (Timer.Context context = metricContext.timer(HIVE_SPEC_SCHEMA_READING_TIMER).time()) { schema = getDirectorySchema(path); } if (schema == null) { return; } hiveUnit.setSerDeType(this.serDeWrapper.getSerDe().getClass().getName()); hiveUnit.setInputFormat(this.serDeWrapper.getInputFormatClassName()); hiveUnit.setOutputFormat(this.serDeWrapper.getOutputFormatClassName()); addSchemaProperties(path, hiveUnit, schema); }