@Override public RecordReader<NullWritable, AvroGenericRecordWritable> getRecordReader(InputSplit inputSplit, JobConf jc, Reporter reporter) throws IOException { return new AvroGenericRecordReader(jc, (FileSplit) inputSplit, reporter); }
@Override public RecordWriter<WritableComparable, AvroGenericRecordWritable> getRecordWriter(FileSystem ignored, JobConf job, String fileName, Progressable progress) throws IOException { return new WrapperRecordWriter<WritableComparable, AvroGenericRecordWritable>(job,progress,fileName); }
@Override public float getProgress() throws IOException { return stop == start ? 0.0f : Math.min(1.0f, (getPos() - start) / (float)(stop - start)); }
@Test public void emptyFile() throws IOException { AvroGenericRecordReader reader = new AvroGenericRecordReader(jobConf, emptyFileSplit, reporter); //next() should always return false Assert.assertEquals(false, reader.next(null, null)); //getPos() should always return 0 Assert.assertEquals(0, reader.getPos()); //close() should just do nothing reader.close(); } }
@Override public void write(K key, V value) throws IOException { getHiveWriter().write(value); }
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) { gdr.setExpected(latest); } if (split.getLength() == 0) { this.isEmptyInput = true; this.start = 0; this.reader = null; } else { this.isEmptyInput = false; this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); } this.stop = split.getStart() + split.getLength(); this.recordReaderID = new UID(); }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
if(pathIsInPartition(split.getPath(), partitionPath)) { if(LOG.isInfoEnabled()) { LOG.info("Matching partition " + partitionPath +
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
@Override public RecordReader<NullWritable, AvroGenericRecordWritable> getRecordReader(InputSplit inputSplit, JobConf jc, Reporter reporter) throws IOException { return new AvroGenericRecordReader(jc, (FileSplit) inputSplit, reporter); }
@Override public float getProgress() throws IOException { return stop == start ? 0.0f : Math.min(1.0f, (getPos() - start) / (float)(stop - start)); }
@Override public void close(Reporter reporter) throws IOException { // Normally, I'd worry about the blanket false being passed in here, and that // it'd need to be integrated into an abort call for an OutputCommitter, but the // underlying recordwriter ignores it and throws it away, so it's irrelevant. getHiveWriter().close(false); }
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) { gdr.setExpected(latest); } if (split.getLength() == 0) { this.isEmptyInput = true; this.start = 0; this.reader = null; } else { this.isEmptyInput = false; this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); } this.stop = split.getStart() + split.getLength(); this.recordReaderID = new UID(); }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
if(pathIsInPartition(split.getPath(), partitionPath)) { if(LOG.isInfoEnabled()) { LOG.info("Matching partition " + partitionPath +
@Override public RecordWriter<WritableComparable, AvroGenericRecordWritable> getRecordWriter(FileSystem ignored, JobConf job, String fileName, Progressable progress) throws IOException { return new WrapperRecordWriter<WritableComparable, AvroGenericRecordWritable>(job,progress,fileName); }
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
@Override public RecordReader<NullWritable, AvroGenericRecordWritable> getRecordReader(InputSplit inputSplit, JobConf jc, Reporter reporter) throws IOException { return new AvroGenericRecordReader(jc, (FileSplit) inputSplit, reporter); }
@Override public void write(K key, V value) throws IOException { getHiveWriter().write(value); }
@Override public void close(Reporter reporter) throws IOException { // Normally, I'd worry about the blanket false being passed in here, and that // it'd need to be integrated into an abort call for an OutputCommitter, but the // underlying recordwriter ignores it and throws it away, so it's irrelevant. getHiveWriter().close(false); }