org.apache.hadoop.mapreduce.lib.output.TextOutputFormat java code examples

Refine search

private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = getConf();
 Path inputDir = writeInputFile(conf);
 conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
 conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
 Job job = Job.getInstance(conf);
 job.setJarByClass(PerformanceEvaluation.class);
 job.setJobName("HBase Performance Evaluation");
 job.setInputFormatClass(PeInputFormat.class);
 PeInputFormat.setInputPaths(job, inputDir);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(LongWritable.class);
 job.setMapperClass(EvaluationMapTask.class);
 job.setReducerClass(LongSumReducer.class);
 job.setNumReduceTasks(1);
 job.setOutputFormatClass(TextOutputFormat.class);
 TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.initCredentials(job);
 job.waitForCompletion(true);
}

 @Override
 public Void call() throws IOException, InterruptedException {
  final OutputCommitter outputCommitter =
    tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
  outputCommitter.setupTask(taCtx[taskIdx]);
  final RecordWriter rw =
    tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
  writeOutput(rw, taCtx[taskIdx]);
  outputCommitter.commitTask(taCtx[taskIdx]);
  return null;
 }
});

public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: WordCount <input path> <result path>");
    return;
  }
  final String inputPath = args[0];
  final String outputPath = args[1];
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // Set up the Hadoop Input Format
  Job job = Job.getInstance();
  HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
  TextInputFormat.addInputPath(job, new Path(inputPath));
  // Create a Flink job with it
  DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
  // Tokenize the line and convert from Writable "Text" to String for better handling
  DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
  // Sum up the words
  DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
  // Convert String back to Writable "Text" for use with Hadoop Output Format
  DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
  // Set up Hadoop Output Format
  HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
  hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
  hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
  TextOutputFormat.setOutputPath(job, new Path(outputPath));
  // Output & Execute
  hadoopResult.output(hadoopOutputFormat);
  env.execute("Word Count");
}

private void testAbortInternal(int version)
  throws IOException, InterruptedException {
 Job job = Job.getInstance();
 FileOutputFormat.setOutputPath(job, outDir);
 Configuration conf = job.getConfiguration();
 conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
 conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
   version);
 JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
 TextOutputFormat theOutputFormat = new TextOutputFormat();
 RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
 writeOutput(theRecordWriter, tContext);
 File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
   .toString());
 assertFalse("task temp dir still exists", expectedFile.exists());

@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  Job job = new Job(conf, "QuestionAnswerBuilder");
  job.setJarByClass(QuestionAnswerBuilder.class);
  job.setMapperClass(HierarchyMapper.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  job.setReducerClass(QuestionAnswerReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(Text.class);
  return job.waitForCompletion(true) ? 0 : 2;
}

@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length != 2) {
  System.out.println("Usage: CountRowKeys tableName outputPath");
  return 1;
 }
 Job job = new Job(getConf(), this.getClass().getName());
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(SequenceFileInputFormat.class);
 SequenceFileInputFormat.addInputPath(job, new Path(ServerConstants.getTablesDir() + "/" + args[0] + "/*/*/data"));
 job.setMapperClass(MyMapper.class);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(NullWritable.class);
 job.setReducerClass(MyReducer.class);
 TextOutputFormat.setOutputPath(job, new Path(args[1]));
 job.waitForCompletion(true);
 return job.isSuccessful() ? 0 : 1;
}

private boolean runJob(float badRecordThreshold) throws Exception {
 Configuration conf = new Configuration();
 conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold);
 Job job = new Job(conf);
 job.setJarByClass(this.getClass());
 job.setMapperClass(MyMapper.class);
 job.setInputFormatClass(HCatInputFormat.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 HCatInputFormat.setInput(job, "default", "test_bad_records");
 job.setMapOutputKeyClass(HCatRecord.class);
 job.setMapOutputValueClass(HCatRecord.class);
 job.setNumReduceTasks(0);
 Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output");
 if (path.getFileSystem(conf).exists(path)) {
  path.getFileSystem(conf).delete(path, true);
 }
 TextOutputFormat.setOutputPath(job, path);
 return job.waitForCompletion(true);
}

FileSystem.get(getConf()).delete(new Path(args[1]), true);
Job job1 = Job.getInstance(getConf());
job1.setJobName("sequence collection conversion (phase 1)");
job1.setJarByClass(this.getClass());
job1.getConfiguration().setStrings("de.mpii.tools.itemSeparator", itemSeparator);
FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration()));
TextOutputFormat.setOutputPath(job1, new Path(output + "/wc"));
job1.getConfiguration().set("mapreduce.cluster.mapmemory.mb", "4096");
job1.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096");

@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(AccumuloInputFormat.class);
 AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
 AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
 job.setOutputFormatClass(TextOutputFormat.class);
 TextOutputFormat.setOutputPath(job, new Path(args[6]));
 job.waitForCompletion(true);

@SuppressWarnings("unchecked")
@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  Path inputPath = new Path(args[0]);
  Path partitionFile = new Path(args[1] + "_partitions.lst");
  Path outputStage = new Path(args[1] + "_staging");
  Path outputOrder = new Path(args[1]);
  Job sampleJob = new Job(conf, "TotalOrderSortingStage");
  sampleJob.setJarByClass(TotalOrderSortingStage.class);
  sampleJob.setMapperClass(LastAccessMapper.class);
  sampleJob.setNumReduceTasks(0);
  sampleJob.setOutputKeyClass(Text.class);
    SequenceFileInputFormat.setInputPaths(orderJob, outputStage);
    TextOutputFormat.setOutputPath(orderJob, outputOrder);
    orderJob.getConfiguration().set(
        "mapred.textoutputformat.separator", "");
  FileSystem.get(new Configuration()).delete(partitionFile, false);
  FileSystem.get(new Configuration()).delete(outputStage, true);
  return code;

FileSystem.get(getConf()).delete(new Path(args[1]), true);
Job job1 = new Job(getConf());
job1.setJobName("document collection conversion (phase 1)");
job1.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs);
job1.setJarByClass(this.getClass());
FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration()));
TextOutputFormat.setOutputPath(job1, new Path(output + "/wc"));
job2.setJobName("document collection conversion (phase 2)");
job2.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs);
job2.setJarByClass(this.getClass());

public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  Path postInput = new Path(otherArgs[0]);
  Path userInput = new Path(otherArgs[1]);
  Path outputDirIntermediate = new Path(otherArgs[2] + "_int");
  Path outputDir = new Path(otherArgs[2]);
  Job countingJob = new Job(conf, "JobChaining-Counting");
  countingJob.setJarByClass(BasicJobChaining.class);
  countingJob.setMapperClass(UserIdCountMapper.class);
  countingJob.setCombinerClass(LongSumReducer.class);
  countingJob.setReducerClass(UserIdSumReducer.class);
  countingJob.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(countingJob, outputDirIntermediate);
    Job binningJob = new Job(new Configuration(), "JobChaining-Binning");
    binningJob.setJarByClass(BasicJobChaining.class);
    MultipleOutputs.setCountersEnabled(binningJob, true);
    TextOutputFormat.setOutputPath(binningJob, outputDir);

private static Job ultimateUnionMapReduce(Path input, Path output,
  OperationsParams params) throws IOException, InterruptedException,
  ClassNotFoundException {
 Job job = new Job(params, "UltimateUnion");
 job.setJarByClass(UltimateUnion.class);
 // Set map and reduce
 job.setMapperClass(UltimateUnionMap.class);
 job.setMapOutputKeyClass(NullWritable.class);
 job.setMapOutputValueClass(OGCJTSShape.class);
 job.setNumReduceTasks(0);
 // Set input and output
 job.setInputFormatClass(SpatialInputFormat3.class);
 SpatialInputFormat3.addInputPath(job, input);
 job.setOutputFormatClass(TextOutputFormat.class);
 TextOutputFormat.setOutputPath(job, output);
 // Submit the job
 if (!params.getBoolean("background", false)) {
  job.waitForCompletion(false);
  if (!job.isSuccessful())
   throw new RuntimeException("Job failed!");
 } else {
  job.submit();
 }
 return job;
}

Job job = Job.getInstance(conf);
job.setJarByClass(TextOutputExample.class);
job.setMapperClass(RyaToRdfMapper.class);
job.setNumReduceTasks(0);
Path outputPath = new Path(tempDir, "rdf-output");
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputPath);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);

opts.parseArgs(this.getClass().getName(), args);
Job job = Job.getInstance(getConf(),
  this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
job.setInputFormatClass(AccumuloInputFormat.class);
opts.setAccumuloConfigs(job);
job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", opts.scanOffline);
TextOutputFormat.setOutputPath(job, new Path(opts.outputDir));
job.waitForCompletion(true);

JobConf conf = new JobConf();
conf.set("mapreduce.framework.name", "local");
Job job = new Job(conf);
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
job.submit();

private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
  final Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  final Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
  conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
  FileSystem.closeAll();
      final int taskIdx = i;
      executor.submit((Callable<Void>) () -> {
        final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
        outputCommitter.setupTask(taCtx[taskIdx]);
        final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
        writeOutput(rw, taCtx[taskIdx]);
        outputCommitter.commitTask(taCtx[taskIdx]);
  final RawLocalFileSystem lfs = new RawLocalFileSystem();
  lfs.setConf(conf);
  assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
  FileUtil.fullyDelete(new File(outDir.toString()));

public int run(Path outputDir, int numReducers) throws Exception {
 LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers);
 job = Job.getInstance(getConf());
 job.setJobName("Link Verifier");
 job.setNumReduceTasks(numReducers);
 job.setJarByClass(getClass());
                         AbstractHBaseTool.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.setReducerClass(VerifyReducer.class);
 job.setOutputKeyClass(BytesWritable.class);
 job.setOutputValueClass(BytesWritable.class);
 TextOutputFormat.setOutputPath(job, outputDir);
 boolean success = job.waitForCompletion(true);

 public RecordWriter<K, V> 
     getRecordWriter(TaskAttemptContext job
             ) throws IOException, InterruptedException {
  Configuration conf = job.getConfiguration();
  boolean isCompressed = getCompressOutput(job);
  String keyValueSeparator= conf.get(SEPERATOR, "\t");
  CompressionCodec codec = null;
  String extension = "";
  if (isCompressed) {
   Class<? extends CompressionCodec> codecClass = 
    getOutputCompressorClass(job, GzipCodec.class);
   codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
   extension = codec.getDefaultExtension();
  }
  Path file = getDefaultWorkFile(job, extension);
  FileSystem fs = file.getFileSystem(conf);
  if (!isCompressed) {
   FSDataOutputStream fileOut = fs.create(file, false);
   return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
  } else {
   FSDataOutputStream fileOut = fs.create(file, false);
   return new LineRecordWriter<K, V>(new DataOutputStream
                    (codec.createOutputStream(fileOut)),
                    keyValueSeparator);
  }
 }
}

private int createParitionFile(String sequenceFileInput, String outputPath, float frequency) throws IOException, ClassNotFoundException, InterruptedException {
  Configuration config = getConf();
  config.setFloat(SAMPLE_FREQUENCY, frequency);
  Job sampler = new Job(config);
  sampler.setInputFormatClass(TextInputFormat.class);
  sampler.setOutputFormatClass(TextOutputFormat.class);
  sampler.setOutputKeyClass(Text.class);
  sampler.setOutputValueClass(NullWritable.class);
  sampler.setNumReduceTasks(0);
  sampler.setMapperClass(Map.class);
  TextInputFormat.addInputPath(sampler, new Path(sequenceFileInput));
  TextOutputFormat.setOutputPath(sampler, new Path(outputPath));
  sampler.waitForCompletion(true);
  return 0;
}

Javadoc

An OutputFormat that writes plain text files.

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
setRequestProperty (URLConnection)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSupportFragmentManager (FragmentActivity)
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Kernel (java.awt.image)
JCheckBox (javax.swing)
Best plugins for Eclipse

How to useTextOutputFormat in org.apache.hadoop.mapreduce.lib.output

Best Java code snippets using org.apache.hadoop.mapreduce.lib.output.TextOutputFormat (Showing top 20 results out of 405)

Refine search

How to use
TextOutputFormat
in
org.apache.hadoop.mapreduce.lib.output