org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput java code examples

/**
 * Convert an output stream to a compressed output stream based on codecs and compression options
 * specified in the Job Configuration.
 *
 * @param jc
 *          Job Configuration
 * @param out
 *          Output Stream to be converted into compressed output stream
 * @return compressed output stream
 */
public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
  throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createCompressedStream(jc, out, isCompressed);
}

/**
 * Convert an output stream to a compressed output stream based on codecs and compression options
 * specified in the Job Configuration.
 *
 * @param jc
 *          Job Configuration
 * @param out
 *          Output Stream to be converted into compressed output stream
 * @return compressed output stream
 */
public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
  throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createCompressedStream(jc, out, isCompressed);
}

/**
 * Create a sequencefile output stream based on job configuration.
 *
 * @param jc
 *          Job configuration
 * @param fs
 *          File System to create file in
 * @param file
 *          Path to be created
 * @param keyClass
 *          Java Class for key
 * @param valClass
 *          Java Class for value
 * @return output stream over the created sequencefile
 */
public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
  Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable);
}

/**
 * Create a sequencefile output stream based on job configuration.
 *
 * @param jc
 *          Job configuration
 * @param fs
 *          File System to create file in
 * @param file
 *          Path to be created
 * @param keyClass
 *          Java Class for key
 * @param valClass
 *          Java Class for value
 * @return output stream over the created sequencefile
 */
public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
  Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable);
}

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable>
 getRecordWriter(FileSystem ignore, JobConf job,
         String name, Progressable prog)
 throws IOException {
 Schema schema = AvroJob.getOutputSchema(job);
 final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());
 if (FileOutputFormat.getCompressOutput(job)) {
  int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY,
              CodecFactory.DEFAULT_DEFLATE_LEVEL);
  writer.setCodec(CodecFactory.deflateCodec(level));
 }
 Path path =
  FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT);
 writer.create(schema, path.getFileSystem(job).create(path));
 return new RecordWriter<TetherData, NullWritable>() {
   public void write(TetherData datum, NullWritable ignore)
    throws IOException {
    writer.appendEncoded(datum.buffer());
   }
   public void close(Reporter reporter) throws IOException {
    writer.close();
   }
  };
}

CodecFactory factory = null;
if (FileOutputFormat.getCompressOutput(job)) {
 int deflateLevel = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
 int xzLevel = job.getInt(XZ_LEVEL_KEY, DEFAULT_XZ_LEVEL);

@Override
public boolean isHadoopCompressionSet() {
 return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf);
}

@Override
public boolean isHadoopCompressionSet() {
 return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf);
}

@Override
public boolean isHadoopCompressionSet() {
 return org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(conf);
}

/**
 * Convert an output stream to a compressed output stream based on codecs and compression options
 * specified in the Job Configuration.
 *
 * @param jc
 *          Job Configuration
 * @param out
 *          Output Stream to be converted into compressed output stream
 * @return compressed output stream
 */
public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
  throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createCompressedStream(jc, out, isCompressed);
}

/**
 * Convert an output stream to a compressed output stream based on codecs and compression options
 * specified in the Job Configuration.
 *
 * @param jc
 *          Job Configuration
 * @param out
 *          Output Stream to be converted into compressed output stream
 * @return compressed output stream
 */
public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
  throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createCompressedStream(jc, out, isCompressed);
}

/**
 * Create a sequencefile output stream based on job configuration.
 *
 * @param jc
 *          Job configuration
 * @param fs
 *          File System to create file in
 * @param file
 *          Path to be created
 * @param keyClass
 *          Java Class for key
 * @param valClass
 *          Java Class for value
 * @return output stream over the created sequencefile
 */
public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
  Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable);
}

/**
 * Create a sequencefile output stream based on job configuration.
 *
 * @param jc
 *          Job configuration
 * @param fs
 *          File System to create file in
 * @param file
 *          Path to be created
 * @param keyClass
 *          Java Class for key
 * @param valClass
 *          Java Class for value
 * @return output stream over the created sequencefile
 */
public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
  Class<?> keyClass, Class<?> valClass) throws IOException {
 boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
 return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed);
}

 /**
  * Returns path to a file in which the final cell will be written.
  * @param cellIndex The index of the cell to retrieve its output path.
  * @return
  * @throws IOException
  */
 protected Path getFinalCellPath(int cellIndex) throws IOException {
  Path path;
  do {
   String filename = counter == 0 ? String.format("data_%05d", cellIndex)
     : String.format("data_%05d_%d", cellIndex, counter);
   boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);
   if (isCompressed) {
    Class<? extends CompressionCodec> codecClass =
      FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class);
    // create the named codec
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);
    filename += codec.getDefaultExtension();
   }
   
   path = getFilePath(filename);
   counter++;
  } while (fileSystem.exists(path));
  return path;
 }
}

 throws IOException {
OutputStream cellStream;
boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);

static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
  JobConf job) throws UnsupportedEncodingException {
 if (FileOutputFormat.getCompressOutput(job)) {
  int level = job.getInt(DEFLATE_LEVEL_KEY,
    DEFAULT_DEFLATE_LEVEL);
  String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
  CodecFactory factory = codecName.equals(DEFLATE_CODEC)
   ? CodecFactory.deflateCodec(level)
   : CodecFactory.fromString(codecName);
  writer.setCodec(factory);
 }
 // Do max as core-default.xml has io.file.buffer.size as 4K
 writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
     job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));
 // copy metadata from job
 for (Map.Entry<String,String> e : job) {
  if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
   writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
           e.getValue());
  if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
   writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
           URLDecoder.decode(e.getValue(), "ISO-8859-1")
           .getBytes("ISO-8859-1"));
 }
}

public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog)
    throws IOException {
  boolean isMapOnly = job.getNumReduceTasks() == 0;
  Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
  final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema));
  if (FileOutputFormat.getCompressOutput(job)) {
    int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
    writer.setCodec(CodecFactory.deflateCodec(level));
  }
  // copy metadata from job
  for (Map.Entry<String, String> e : job) {
    if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
    if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
  }
  Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
  writer.create(schema, path.getFileSystem(job).create(path));
  return new AvroRecordWriter(writer, schema);
}

public WARCWriter(JobConf job, String filename, Progressable progress) throws IOException {
  CompressionCodec codec = getCompressOutput(job) ? WARCFileWriter.getGzipCodec(job) : null;
  Path workFile = FileOutputFormat.getTaskOutputPath(job, filename);
  this.writer = new WARCFileWriter(job, codec, workFile, progress);
}

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable>
 getRecordWriter(FileSystem ignore, JobConf job,
         String name, Progressable prog)
 throws IOException {
 Schema schema = AvroJob.getOutputSchema(job);
 
 final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());
 if (FileOutputFormat.getCompressOutput(job)) {
  int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY,
              AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
  writer.setCodec(CodecFactory.deflateCodec(level));
 }
 Path path =
  FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT);
 writer.create(schema, path.getFileSystem(job).create(path));
 return new RecordWriter<TetherData, NullWritable>() {
   public void write(TetherData datum, NullWritable ignore)
    throws IOException {
    writer.appendEncoded(datum.buffer());
   }
   public void close(Reporter reporter) throws IOException {
    writer.close();
   }
  };
}

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable>
 getRecordWriter(FileSystem ignore, JobConf job,
         String name, Progressable prog)
 throws IOException {
 Schema schema = AvroJob.getOutputSchema(job);
 final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());
 if (FileOutputFormat.getCompressOutput(job)) {
  int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY,
              CodecFactory.DEFAULT_DEFLATE_LEVEL);
  writer.setCodec(CodecFactory.deflateCodec(level));
 }
 Path path =
  FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT);
 writer.create(schema, path.getFileSystem(job).create(path));
 return new RecordWriter<TetherData, NullWritable>() {
   public void write(TetherData datum, NullWritable ignore)
    throws IOException {
    writer.appendEncoded(datum.buffer());
   }
   public void close(Reporter reporter) throws IOException {
    writer.close();
   }
  };
}

Javadoc

Is the job output compressed?

Popular methods of FileOutputFormat

setOutputPath
Set the Path of the output directory for the map-reduce job.
getOutputPath
Get the Path to the output directory for the map-reduce job.
getTaskOutputPath
Helper function to create the task's temporary output directory and return the path to the task's ou
setCompressOutput
Set whether the output of the job is compressed.
getUniqueName
Helper function to generate a name that is unique for the task.The generated name can be used to cre
setWorkOutputPath
Set the Path of the task's temporary output directory for the map-reduce job. Note: Task output path
getOutputCompressorClass
Get the CompressionCodec for compressing the job outputs.
getWorkOutputPath
Get the Path to the task's temporary output directory for the map-reduce job TASKS' SIDE-EFFECT FILE
setOutputCompressorClass
Set the CompressionCodec to be used to compress job outputs.
getPathForCustomFile
Helper function to generate a Path for a file that is unique for the task within the job output dire
getRecordWriter

getRecordWriter

Popular in Java

Reading from database using SQL prepared statement
runOnUiThread (Activity)
setRequestProperty (URLConnection)
getExternalFilesDir (Context)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Table (org.hibernate.mapping)
A relational table
From CI to AI: The AI layer in your organization

How to use getCompressOutputmethodin org.apache.hadoop.mapred.FileOutputFormat

Best Java code snippets using org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput (Showing top 20 results out of 315)

How to use
getCompressOutput
method
in
org.apache.hadoop.mapred.FileOutputFormat