org.apache.hadoop.mapreduce.Job.<init> java code examples

Refine search

/**
 * Add a OutputFormat configuration to the Job with a alias name.
 *
 * @param alias the name to be given to the OutputFormat configuration
 * @param outputFormatClass OutputFormat class
 * @param keyClass the key class for the output data
 * @param valueClass the value class for the output data
 * @throws IOException
 */
public void addOutputFormat(String alias,
  Class<? extends OutputFormat> outputFormatClass,
  Class<?> keyClass, Class<?> valueClass) throws IOException {
 Job copy = new Job(this.job.getConfiguration());
 outputConfigs.put(alias, copy);
 copy.setOutputFormatClass(outputFormatClass);
 copy.setOutputKeyClass(keyClass);
 copy.setOutputValueClass(valueClass);
}

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(c, jobName);
 initJob(scans, job);
 job.setReducerClass(ScanReducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 job.waitForCompletion(true);
 assertTrue(job.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(getClass());
  job,
  true,
  new Path(restoreDir)
);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.waitForCompletion(true);
scanTimer.stop();

Job job = new Job();
FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath()));
job.setInputFormatClass(AvroKeyValueInputFormat.class);
AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT));
AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING));
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(AvroValue.class);
Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index");
FileOutputFormat.setOutputPath(job, outputPath);
assertTrue(job.waitForCompletion(true));

 Configuration conf = new Configuration();
 conf.set("hive.metastore.uris", "thrift://no.such.machine:10888");
 Job job = new Job(conf, "Write-hcat-seq-table");
 job.setJarByClass(TestPassProperties.class);
 job.setMapperClass(Map.class);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(DefaultHCatRecord.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.setInputPaths(job, INPUT_FILE_NAME);
 HCatOutputFormat.setSchema(job, getSchema());
 job.setNumReduceTasks(0);
 assertTrue(job.waitForCompletion(true));
 new FileOutputCommitterContainer(job, null).cleanupJob(job);
} catch (Exception e) {

private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
  ClassNotFoundException {
 job = new Job(getConf());
 job.setJobName("Link Verifier");
 job.setNumReduceTasks(numReducers);
 job.setJarByClass(getClass());
 setJobScannerConf(job);
 Scan scan = new Scan();
 scan.addColumn(FAMILY_NAME, COLUMN_PREV);
 scan.setCaching(10000);
 scan.setCacheBlocks(false);
 String[] split = labels.split(COMMA);
 scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
   split[(this.labelIndex * 2) + 1]));
 TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
   BytesWritable.class, BytesWritable.class, job);
 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.setReducerClass(VerifyReducer.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 TextOutputFormat.setOutputPath(job, outputDir);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths)
  throws IOException, InterruptedException, ClassNotFoundException {
 Path outputDir = getTestDir(TEST_NAME, "verify-output");
 Job job = new Job(conf);
 job.setJarByClass(this.getClass());
 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
 setJobScannerConf(job);
 Scan scan = new Scan();
 scan.setAuthorizations(new Authorizations(auths));
 TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan,
   VerifyMapper.class, NullWritable.class, NullWritable.class, job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
 job.setNumReduceTasks(0);
 FileOutputFormat.setOutputPath(job, outputDir);
 assertTrue(job.waitForCompletion(true));
 return job;
}

public int runCopier(String outputDir) throws Exception {
 Job job = null;
 Scan scan = null;
 job = new Job(getConf());
 job.setJobName("Data copier");
 job.getConfiguration().setInt("INDEX", labelIndex);
 job.getConfiguration().set("LABELS", labels);
 job.setJarByClass(getClass());
 scan = new Scan();
 scan.setCacheBlocks(false);
 TableMapReduceUtil.initCredentials(job);
 job.setNumReduceTasks(0);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;

private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
                 boolean putSortReducer) throws IOException,
    InterruptedException, ClassNotFoundException {
 Job job = new Job(conf, "testLocalMRIncrementalLoad");
 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
   MutationSerialization.class.getName(), ResultSerialization.class.getName(),
   CellSerialization.class.getName());
 setupRandomGeneratorMapper(job, putSortReducer);
 if (tableInfo.size() > 1) {
  MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
  int sum = 0;
  for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
   sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
  }
  assertEquals(sum, job.getNumReduceTasks());
 }
 else {
  RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
  HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
      regionLocator);
  assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
 }
 FileOutputFormat.setOutputPath(job, outDir);
 assertFalse(util.getTestFileSystem().exists(outDir)) ;
 assertTrue(job.waitForCompletion(true));
}

private void runTestOnTable()
throws IOException, InterruptedException, ClassNotFoundException {
 Job job = null;
 try {
  job = new Job(UTIL.getConfiguration(), "test123");
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setNumReduceTasks(0);
  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_NAME);
  scan.setTimeRange(MINSTAMP, MAXSTAMP);
  scan.setMaxVersions();
  TableMapReduceUtil.initTableMapperJob(TABLE_NAME,
   scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job);
  job.waitForCompletion(true);
 } catch (IOException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
 } finally {
  if (job != null) {
   FileUtil.fullyDelete(
    new File(job.getConfiguration().get("hadoop.tmp.dir")));
  }
 }
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 String tableName = args[0];
 String columnFamily = args[1];
 System.out.println("****" + tableName);
 conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
 conf.set(TableInputFormat.INPUT_TABLE, tableName);
 conf.set("index.tablename", tableName);
 conf.set("index.familyname", columnFamily);
 String[] fields = new String[args.length - 2];
 System.arraycopy(args, 2, fields, 0, fields.length);
 conf.setStrings("index.fields", fields);
 Job job = new Job(conf, tableName);
 job.setJarByClass(IndexBuilder.class);
 job.setMapperClass(Map.class);
 job.setNumReduceTasks(0);
 job.setInputFormatClass(TableInputFormat.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 return job;
}

Job job = new Job(conf, "testWritingPEData");
setupRandomGeneratorMapper(job, false);
job.setReducerClass(CellSortReducer.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
job.setNumReduceTasks(4);
  CellSerialization.class.getName());
FileOutputFormat.setOutputPath(job, testDir);
assertTrue(job.waitForCompletion(false));
FileStatus [] files = fs.listStatus(testDir);
assertTrue(files.length > 0);

Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(getClass());
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.waitForCompletion(true);
scanTimer.stop();

 Job job = new Job(util.getConfiguration());
 Scan scan = new Scan(startRow, endRow); // limit the scan
 job.setJarByClass(util.getClass());
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
     TestTableSnapshotInputFormat.class);
 job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
 job.setNumReduceTasks(1);
 job.setOutputFormatClass(NullOutputFormat.class);
 Assert.assertTrue(job.waitForCompletion(true));
} finally {
 if (!shutdownCluster) {

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 Path inputPath = new Path(args[0]);
 String tableName = args[1];
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(Uploader.class);
 FileInputFormat.setInputPaths(job, inputPath);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(Uploader.class);
 // No reducers.  Just write straight to table.  Call initTableReducerJob
 // because it sets up the TableOutputFormat.
 TableMapReduceUtil.initTableReducerJob(tableName, null, job);
 job.setNumReduceTasks(0);
 return job;
}

try {
 LOG.info("Before map/reduce startup");
 job = new Job(table.getConfiguration(), "process column contents");
 job.setNumReduceTasks(1);
 Scan scan = new Scan();
   table.getName().getNameAsString(),
   IdentityTableReducer.class, job);
 FileOutputFormat.setOutputPath(job, new Path("test"));
 LOG.info("Started " + table.getName());
 assertTrue(job.waitForCompletion(true));
 LOG.info("After map/reduce completion");

Job job = new Job(conf);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setJarByClass(Mapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/"));
Path out = new Path("files/out/processed/");
fs.delete(out, true);
job.waitForCompletion(true);

try {
 LOG.info("Before map/reduce startup");
 job = new Job(table.getConfiguration(), "process column contents");
 job.setNumReduceTasks(1);
 Scan scan = new Scan();
   table.getName().getNameAsString(),
  IdentityTableReducer.class, job);
 FileOutputFormat.setOutputPath(job, new Path("test"));
 LOG.info("Started " + table.getName().getNameAsString());
 assertTrue(job.waitForCompletion(true));
 LOG.info("After map/reduce completion");

Job job = new Job();
job.setMapperClass(MyMapper.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
return 0;

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl

Popular in Java

Running tasks concurrently on multiple threads
compareTo (BigDecimal)
getResourceAsStream (ClassLoader)
putExtra (Intent)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
URL (java.net)
A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Menu (java.awt)
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top 12 Jupyter Notebook extensions

How to use org.apache.hadoop.mapreduce.Jobconstructor

Best Java code snippets using org.apache.hadoop.mapreduce.Job.<init> (Showing top 20 results out of 1,629)

Refine search

How to use
org.apache.hadoop.mapreduce.Job
constructor