org.apache.hadoop.mapred.FileOutputFormat java code examples

Refine search

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType);
this.position[chunkId] = 0;
this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(conf),
                      getStoreName() + "."
                          + Integer.toString(chunkId) + "_"
                          + this.taskId + INDEX_FILE_EXTENSION
                          + fileExtension);
this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(conf),
                      getStoreName() + "."
                          + Integer.toString(chunkId) + "_"
                          + fileExtension);
if(this.fs == null)
  this.fs = this.taskIndexFileName[chunkId].getFileSystem(conf);
if(isValidCompressionEnabled) {
  this.indexFileStream[chunkId] = new DataOutputStream(new BufferedOutputStream(new GZIPOutputStream(fs.create(this.taskIndexFileName[chunkId]),
                                                    DEFAULT_BUFFER_SIZE)));
  this.valueFileStream[chunkId] = new DataOutputStream(new BufferedOutputStream(new GZIPOutputStream(fs.create(this.taskValueFileName[chunkId]),
                                                    DEFAULT_BUFFER_SIZE)));
  this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]);
  this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]);

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable>
 getRecordWriter(FileSystem ignore, JobConf job,
         String name, Progressable prog)
 throws IOException {
 Schema schema = AvroJob.getOutputSchema(job);
 final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());
 if (FileOutputFormat.getCompressOutput(job)) {
  int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY,
              CodecFactory.DEFAULT_DEFLATE_LEVEL);
  writer.setCodec(CodecFactory.deflateCodec(level));
 }
 Path path =
  FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT);
 writer.create(schema, path.getFileSystem(job).create(path));
 return new RecordWriter<TetherData, NullWritable>() {
   public void write(TetherData datum, NullWritable ignore)
    throws IOException {
    writer.appendEncoded(datum.buffer());
   }
   public void close(Reporter reporter) throws IOException {
    writer.close();
   }
  };
}

conf.setJobName(getId());
conf.setNumReduceTasks(0);
  conf.set("hadoop.job.ugi", hadoop_ugi);
      FileSystem fs = FileSystem.get(conf);
      FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter);
      path = statuses[statuses.length - 1].getPath().toString();
      System.out.println("Using latest folder: " + path);
    HadoopUtils.addAllSubPaths(conf, new Path(path));
  FileOutputFormat.setOutputPath(conf, new Path(location));
    FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
    fs.delete(FileOutputFormat.getOutputPath(conf), true);

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 wordCountUtil.validateCountsFile();
}

JobConf conf = prepareJobConf(baseJobConf);
FileSystem fs = outputDir.getFileSystem(conf);
if(fs.exists(outputDir)) {
  info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
  fs.delete(outputDir, true);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
FileSystem outputFs = outputDir.getFileSystem(conf);
if(outputFs.exists(outputDir)) {
  throw new IOException("Final output directory already exists.");
JobClient jc = new JobClient(conf);
RunningJob runningJob = jc.submitJob(conf);
Counters counters;
  if (!jc.monitorAndPrintJob(conf, runningJob)) {
    counters = runningJob.getCounters();

   (org.apache.hadoop.mapred.RecordWriter)null, context);
} else {
 Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
 Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()),
      context.getConfiguration().get("mapreduce.output.basename", "part")));
     parentDir.getFileSystem(context.getConfiguration()),
     new JobConf(context.getConfiguration()),
     childPath.toString(),
     InternalUtil.createReporter(context)),

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

public void setupJob(JobConf conf) throws IOException {
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  if (outputPath != null) {
    Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
    FileSystem fileSys = tmpDir.getFileSystem(conf);
    if (!fileSys.mkdirs(tmpDir)) {
      LOG.error("Mkdirs failed to create " + tmpDir.toString());
    }
  }
}

JobConf job = new JobConf(PagerankData.class);
String jobname = "Create pagerank links";
Path fout = new Path(options.getResultPath(), EDGES_DIR_NAME);
job.setJobName(jobname);
setPageRankLinksOptions(job);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job, dummy.getPath());
job.setInputFormat(NLineInputFormat.class);
  job.set("mapred.output.compression.type","BLOCK");
  job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
  FileOutputFormat.setCompressOutput(job, true);
  FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
FileOutputFormat.setOutputPath(job, fout);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

public static void makeTempPath( Configuration conf ) throws IOException
 {
 // create job specific temporary directory in output path
 Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) );
 if( outputPath != null )
  {
  Path tmpDir = new Path( outputPath, TEMPORARY_PATH );
  FileSystem fileSys = tmpDir.getFileSystem( conf );
  if( !fileSys.exists( tmpDir ) && !fileSys.mkdirs( tmpDir ) )
   LOG.error( "mkdirs failed to create {}", tmpDir );
  }
 }

@Test
public void testJob() throws Exception {
 JobConf job = new JobConf();
 Path inputPath1 = new Path(INPUT_DIR_1.getRoot().getPath());
 Path inputPath2 = new Path(INPUT_DIR_2.getRoot().getPath());
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 writeNamesFiles(new File(inputPath1.toUri().getPath()));
 writeBalancesFiles(new File(inputPath2.toUri().getPath()));
 job.setJobName("multiple-inputs-join");
 AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class,
     ReflectData.get().getSchema(NamesRecord.class));
 AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class,
     ReflectData.get().getSchema(BalancesRecord.class));
 Schema keySchema = ReflectData.get().getSchema(KeyRecord.class);
 Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class);
 AvroJob.setMapOutputSchema(job,
     Pair.getPairSchema(keySchema, valueSchema));
 AvroJob.setOutputSchema(job,
     ReflectData.get().getSchema(CompleteRecord.class));
 AvroJob.setReducerClass(job, ReduceImpl.class);
 job.setNumReduceTasks(1);
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setReflect(job);
 JobClient.runJob(job);
 validateCompleteFile(new File(OUTPUT_DIR.getRoot(), "part-00000.avro"));
}

 /**
  * Helper function to generate a {@link Path} for a file that is unique for
  * the task within the job output directory.
  *
  * <p>The path can be used to create custom files from within the map and
  * reduce tasks. The path name will be unique for each task. The path parent
  * will be the job output directory.</p>ls
  *
  * <p>This method uses the {@link #getUniqueName} method to make the file name
  * unique for the task.</p>
  *
  * @param conf the configuration for the job.
  * @param name the name for the file.
  * @return a unique path accross all tasks of the job.
  */
 public static Path getPathForCustomFile(JobConf conf, String name) {
  return new Path(getWorkOutputPath(conf), getUniqueName(conf, name));
 }
}

File inputPath = new File(INPUT_DIR.getRoot(), "lines.avro");
JobConf job = new JobConf();
Path outputPath = new Path(outputPathStr);
outputPath.getFileSystem(job).delete(outputPath);
execargs.add("org.apache.avro.mapred.tether.WordCountTask");
FileInputFormat.addInputPaths(job, inputPath.toString());
FileOutputFormat.setOutputPath(job, outputPath);
TetherJob.setExecutable(job, exec, execargs, false);
job.set(AvroJob.OUTPUT_SCHEMA, outscheme.toString());

Path rankings = new Path(options.getResultPath(), RANKINGS);
Path fout = new Path(options.getResultPath(), USERVISITS);
JobConf job = new JobConf(HiveData.class);
String jobname = "Create uservisits";
job.setJobName(jobname);
setVisitsOptions(job);
Path uagentPath = new Path(options.getWorkPath(), uagentf);
DistributedCache.addCacheFile(uagentPath.toUri(), job);
DistributedCache.addCacheFile(searchkeyPath.toUri(), job);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
  job.set("mapred.output.compression.type","BLOCK");
        job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
  FileOutputFormat.setCompressOutput(job, true);
  FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
FileOutputFormat.setOutputPath(job, fout);
log.info("Rankings file " + rankings + " as input");
log.info("Ouput file " + fout);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);

public long produceSamples(Path samplePath, boolean textOutput) throws Exception {
  Path input = new Path(samplePath.toString() + "-seeds");
  this.numSamples = writeSeeds(input);
  LOG.info("Generating " + this.numSamples + " of samples");
  JobConf jobConf = getJobConf();
  jobConf.set("genkmeansdataset.dimensions", Integer.toString(dimension));
  FileInputFormat.setInputPaths(jobConf, input);
  FileOutputFormat.setOutputPath(jobConf, samplePath);
  jobConf.setMapperClass(MapClass.class);
  if (textOutput){
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  } else {
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  }
  jobConf.setNumReduceTasks(0);
  JobClient.runJob(jobConf);
  return this.numSamples;
}

public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
  Path out = FileOutputFormat.getOutputPath(job);
  if ((out == null) && (job.getNumReduceTasks() != 0)) {
    throw new InvalidJobConfException(
        "Output directory not set in JobConf.");
  }
  if (fs == null) {
    fs = out.getFileSystem(job);
  }
  if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
    throw new IOException("Segment already parsed!");
  
  
}

/**
 * Gets fully configured JobConf instance.
 *
 * @param input input file name.
 * @param output output directory name.
 * @return Job configuration
 */
public static JobConf getJob(String input, String output) {
  JobConf conf = new JobConf(HadoopWordCount1.class);
  conf.setJobName("wordcount");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  setTasksClasses(conf, true, true, true);
  FileInputFormat.setInputPaths(conf, new Path(input));
  FileOutputFormat.setOutputPath(conf, new Path(output));
  return conf;
}

/**
 * @param args
 * @return the JobConf
 * @throws IOException
 */
public JobConf createSubmittableJob(String[] args) throws IOException {
 JobConf c = new JobConf(getConf(), getClass());
 c.setJobName(NAME);
 // Columns are space delimited
 StringBuilder sb = new StringBuilder();
 final int columnoffset = 2;
 for (int i = columnoffset; i < args.length; i++) {
  if (i > columnoffset) {
   sb.append(" ");
  }
  sb.append(args[i]);
 }
 // Second argument is the table name.
 TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
  RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
 c.setNumReduceTasks(0);
 // First arg is the output directory.
 FileOutputFormat.setOutputPath(c, new Path(args[0]));
 return c;
}

Javadoc

A base class for OutputFormat.

Most used methods

setOutputPath
Set the Path of the output directory for the map-reduce job.
getOutputPath
Get the Path to the output directory for the map-reduce job.
getTaskOutputPath
Helper function to create the task's temporary output directory and return the path to the task's ou
setCompressOutput
Set whether the output of the job is compressed.
getCompressOutput
Is the job output compressed?
getUniqueName
Helper function to generate a name that is unique for the task.The generated name can be used to cre
setWorkOutputPath
Set the Path of the task's temporary output directory for the map-reduce job. Note: Task output path
getOutputCompressorClass
Get the CompressionCodec for compressing the job outputs.
getWorkOutputPath
Get the Path to the task's temporary output directory for the map-reduce job TASKS' SIDE-EFFECT FILE
setOutputCompressorClass
Set the CompressionCodec to be used to compress job outputs.
getPathForCustomFile
Helper function to generate a Path for a file that is unique for the task within the job output dire
getRecordWriter

Popular in Java

Reactive rest calls using spring rest template
scheduleAtFixedRate (ScheduledExecutorService)
onCreateOptionsMenu (Activity)
startActivity (Activity)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Top plugins for WebStorm

How to useFileOutputFormat in org.apache.hadoop.mapred

Best Java code snippets using org.apache.hadoop.mapred.FileOutputFormat (Showing top 20 results out of 1,107)

Refine search

How to use
FileOutputFormat
in
org.apache.hadoop.mapred