/** * Convert an output stream to a compressed output stream based on codecs and compression options * specified in the Job Configuration. * * @param jc * Job Configuration * @param out * Output Stream to be converted into compressed output stream * @return compressed output stream */ public static OutputStream createCompressedStream(JobConf jc, OutputStream out) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createCompressedStream(jc, out, isCompressed); }
/** * Convert an output stream to a compressed output stream based on codecs and compression options * specified in the Job Configuration. * * @param jc * Job Configuration * @param out * Output Stream to be converted into compressed output stream * @return compressed output stream */ public static OutputStream createCompressedStream(JobConf jc, OutputStream out) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createCompressedStream(jc, out, isCompressed); }
/** * Create a sequencefile output stream based on job configuration. * * @param jc * Job configuration * @param fs * File System to create file in * @param file * Path to be created * @param keyClass * Java Class for key * @param valClass * Java Class for value * @return output stream over the created sequencefile */ public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable); }
/** * Create a sequencefile output stream based on job configuration. * * @param jc * Job configuration * @param fs * File System to create file in * @param file * Path to be created * @param keyClass * Java Class for key * @param valClass * Java Class for value * @return output stream over the created sequencefile */ public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable); }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
CodecFactory factory = null; if (FileOutputFormat.getCompressOutput(job)) { int deflateLevel = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); int xzLevel = job.getInt(XZ_LEVEL_KEY, DEFAULT_XZ_LEVEL);
@Override public boolean isHadoopCompressionSet() { return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf); }
@Override public boolean isHadoopCompressionSet() { return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf); }
@Override public boolean isHadoopCompressionSet() { return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf); }
/** * Convert an output stream to a compressed output stream based on codecs and compression options * specified in the Job Configuration. * * @param jc * Job Configuration * @param out * Output Stream to be converted into compressed output stream * @return compressed output stream */ public static OutputStream createCompressedStream(JobConf jc, OutputStream out) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createCompressedStream(jc, out, isCompressed); }
/** * Convert an output stream to a compressed output stream based on codecs and compression options * specified in the Job Configuration. * * @param jc * Job Configuration * @param out * Output Stream to be converted into compressed output stream * @return compressed output stream */ public static OutputStream createCompressedStream(JobConf jc, OutputStream out) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createCompressedStream(jc, out, isCompressed); }
/** * Create a sequencefile output stream based on job configuration. * * @param jc * Job configuration * @param fs * File System to create file in * @param file * Path to be created * @param keyClass * Java Class for key * @param valClass * Java Class for value * @return output stream over the created sequencefile */ public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable); }
/** * Create a sequencefile output stream based on job configuration. * * @param jc * Job configuration * @param fs * File System to create file in * @param file * Path to be created * @param keyClass * Java Class for key * @param valClass * Java Class for value * @return output stream over the created sequencefile */ public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, Class<?> keyClass, Class<?> valClass) throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed); }
/** * Returns path to a file in which the final cell will be written. * @param cellIndex The index of the cell to retrieve its output path. * @return * @throws IOException */ protected Path getFinalCellPath(int cellIndex) throws IOException { Path path; do { String filename = counter == 0 ? String.format("data_%05d", cellIndex) : String.format("data_%05d_%d", cellIndex, counter); boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf); if (isCompressed) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf); filename += codec.getDefaultExtension(); } path = getFilePath(filename); counter++; } while (fileSystem.exists(path)); return path; } }
throws IOException { OutputStream cellStream; boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max( job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema)); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroRecordWriter(writer, schema); }
public WARCWriter(JobConf job, String filename, Progressable progress) throws IOException { CompressionCodec codec = getCompressOutput(job) ? WARCFileWriter.getGzipCodec(job) : null; Path workFile = FileOutputFormat.getTaskOutputPath(job, filename); this.writer = new WARCFileWriter(job, codec, workFile, progress); }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }