org.apache.hadoop.mapred.JobConf.<init> java code examples

Refine search

private JobConf getLocalFSJobConfClone(Configuration jc) {
 if (this.jobCloneUsingLocalFs == null) {
  this.jobCloneUsingLocalFs = new JobConf(jc);
  jobCloneUsingLocalFs.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY,
    Utilities.HADOOP_LOCAL_FS);
 }
 return this.jobCloneUsingLocalFs;
}

public LogRetriever(String statusDir, JobType jobType, Configuration conf)
 throws IOException {
 this.statusDir = statusDir;
 this.jobType = jobType;
 attemptDetailPattern = Pattern.compile(attemptDetailPatternInString);
 attemptLogPattern = Pattern.compile(attemptLogPatternInString);
 attemptIDPattern = Pattern.compile(attemptIDPatternInString);
 attemptStartTimePattern = Pattern.compile(attemptStartTimePatternInString);
 attemptEndTimePattern = Pattern.compile(attemptEndTimePatternInString);
 Path statusPath = new Path(statusDir);
 fs = statusPath.getFileSystem(conf);
 jobClient = new JobClient(new JobConf(conf));
 this.conf = conf;
}

public MiniMrShim(Configuration conf, int numberOfTaskTrackers,
         String nameNode, int numDir) throws IOException {
 this.conf = conf;
 JobConf jConf = new JobConf(conf);
 jConf.set("yarn.scheduler.capacity.root.queues", "default");
 jConf.set("yarn.scheduler.capacity.root.default.capacity", "100");
 jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512);
 jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512);
 jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128);
 jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512);
 jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
 jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512);
 mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir, null, null, jConf);
}

protected JobConf configStage4 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), Hadi.class);
  conf.setJobName("HADI_Stage4");
  
  conf.setMapperClass(MapStage4.class);        
  FileInputFormat.setInputPaths(conf, curbm_path);  
  FileOutputFormat.setOutputPath(conf, radius_path);  
  conf.setNumReduceTasks( 0 );		//This is essential for map-only tasks.
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

@Test
public void testNonAvroReducer() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for Avro from sequence file
 AvroJob.setInputSequenceFile(job);
 AvroJob.setInputSchema(job, SCHEMA);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // mapper is default, identity
 // use a hadoop reducer that consumes Avro input
 AvroJob.setMapOutputSchema(job, SCHEMA);
 job.setReducerClass(NonAvroReducer.class);
 // configure outputPath for non-Avro SequenceFile
 job.setOutputFormat(SequenceFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, outputPath);
 // output key/value classes are default, LongWritable/Text
 JobClient.runJob(job);
 checkFile(new SequenceFileReader<>
      (new File(outputPath.toString() + "/part-00000")));
}

private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector)
    throws IOException
{
  JobConf jobConf = new JobConf();
  jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
  jobConf.set("hive.exec.orc.default.compress", compression.name());
  Properties tableProperties = new Properties();
  tableProperties.setProperty("columns", "test");
  tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName());
  tableProperties.setProperty("orc.stripe.size", "1200000");
  return new OrcOutputFormat().getHiveRecordWriter(
      jobConf,
      new Path(outputFile.toURI()),
      Text.class,
      compression != NONE,
      tableProperties,
      () -> {});
}

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

private static void assertFileContentsDwrfHive(
    Type type,
    TempFile tempFile,
    Iterable<?> expectedValues)
    throws Exception
{
  JobConf configuration = new JobConf(new Configuration(false));
  configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
  configuration.setBoolean(READ_ALL_COLUMNS, false);
  Path path = new Path(tempFile.getFile().getAbsolutePath());
  com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile.createReader(
      path.getFileSystem(configuration),
      path,
      configuration);
  boolean[] include = new boolean[reader.getTypes().size() + 100000];
  Arrays.fill(include, true);
  com.facebook.hive.orc.RecordReader recordReader = reader.rows(include);
  StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
  StructField field = rowInspector.getStructFieldRef("test");
  Iterator<?> iterator = expectedValues.iterator();
  Object rowData = null;
  while (recordReader.hasNext()) {
    rowData = recordReader.next(rowData);
    Object expectedValue = iterator.next();
    Object actualValue = rowInspector.getStructFieldData(rowData, field);
    actualValue = decodeRecordReaderValue(type, actualValue);
    assertColumnValueEquals(type, actualValue, expectedValue);
  }
  assertFalse(iterator.hasNext());
}

/**
 * Gets fully configured JobConf instance.
 *
 * @param input input file name.
 * @param output output directory name.
 * @return Job configuration
 */
public static JobConf getJob(String input, String output) {
  JobConf conf = new JobConf(HadoopWordCount1.class);
  conf.setJobName("wordcount");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  setTasksClasses(conf, true, true, true);
  FileInputFormat.setInputPaths(conf, new Path(input));
  FileOutputFormat.setOutputPath(conf, new Path(output));
  return conf;
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd,
                 ValidWriteIdList writeIds, CompactionInfo ci) {
 JobConf job = new JobConf(conf);
 job.setJobName(jobName);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 job.setJarByClass(CompactorMR.class);
 LOG.debug("User jar set to " + job.getJar());
 job.setMapperClass(CompactorMap.class);
 job.setNumReduceTasks(0);
 job.setInputFormat(CompactorInputFormat.class);
 job.setOutputFormat(NullOutputFormat.class);
 job.setOutputCommitter(CompactorOutputCommitter.class);
 job.set(FINAL_LOCATION, sd.getLocation());
 job.set(TMP_LOCATION, generateTmpPath(sd));
 job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
 job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
 job.setBoolean(IS_COMPRESSED, sd.isCompressed());

private void init() throws IOException {
 conf = new JobConf();
 resetIOContext();
 rcfReader = mock(RCFileRecordReader.class);
 when(rcfReader.next((LongWritable)anyObject(),
           (BytesRefArrayWritable )anyObject())).thenReturn(true);
 // Since the start is 0, and the length is 100, the first call to sync should be with the value
 // 50 so return that for getPos()
 when(rcfReader.getPos()).thenReturn(50L);
 conf.setBoolean("hive.input.format.sorted", true);
 TableDesc tblDesc = Utilities.defaultTd;
 PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
 LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
 pt.put(new Path("/tmp/testfolder"), partDesc);
 MapredWork mrwork = new MapredWork();
 mrwork.getMapWork().setPathToPartitionInfo(pt);
 Utilities.setMapRedWork(conf, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive"));
 hiveSplit = new TestHiveInputSplit();
 hbsReader = new TestHiveRecordReader(rcfReader, conf);
 hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}

private static void runIOTest( Class<? extends Mapper> mapperClass, 
                Path outputDir
                ) throws IOException {
 JobConf job = new JobConf(fsConfig, TestDFSIO.class);
 FileInputFormat.setInputPaths(job, CONTROL_DIR);
 job.setInputFormat(SequenceFileInputFormat.class);
 job.setMapperClass(mapperClass);
 job.setReducerClass(AccumulatingReducer.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(1);
 JobClient.runJob(job);
}

private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) {
 JobConf conf = new JobConf(baseConf);
 conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName());
 // Is this required ?
 conf.set("mapred.reducer.class", ExecReducer.class.getName());
 boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf,
   HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
 conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE,
   useSpeculativeExecReducers);
 return conf;
}

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
 Configuration conf = UTIL.getConfiguration();
 final JobConf job = new JobConf(conf);
 job.setInputFormat(clazz);
 job.setOutputFormat(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 final RunningJob run = JobClient.runJob(job);
 assertTrue("job failed!", run.isSuccessful());
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

protected JobConf configStage2 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.setJobName("ConCmptBlock_pass2");
  
  conf.setMapperClass(MapStage2.class);        
  conf.setReducerClass(RedStage2.class);
  FileInputFormat.setInputPaths(conf, tempbm_path);  
  FileOutputFormat.setOutputPath(conf, nextbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

@Test
public void testSequenceFileInputFormat() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for Avro from sequence file
 AvroJob.setInputSequenceFile(job);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 AvroJob.setInputSchema(job, SCHEMA);
 // mapper is default, identity
 // reducer is default, identity
 // configure output for avro
 AvroJob.setOutputSchema(job, SCHEMA);
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

@Override
protected void initInternal() throws IOException {
 Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>();
 for (ReadEntryWithPath entry : entries) {
  Path path = new Path(entry.getPath());
  Configuration conf = new ProjectionPusher().pushProjectionsAndFilters(
    new JobConf(hiveStoragePlugin.getHiveConf()),
    path.getParent());
  FileSystem fs = path.getFileSystem(conf);
  fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs);
 }
 parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig);
}

Javadoc

Construct a map/reduce job configuration.

Popular methods of JobConf

set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean
setJobName
Set the user-specified job name.

Popular in Java

Parsing JSON documents to java classes using gson
getContentResolver (Context)
setScale (BigDecimal)
getExternalFilesDir (Context)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
Option (scala)
Top plugins for Android Studio

How to use org.apache.hadoop.mapred.JobConfconstructor

Best Java code snippets using org.apache.hadoop.mapred.JobConf.<init> (Showing top 20 results out of 2,457)

Refine search

How to use
org.apache.hadoop.mapred.JobConf
constructor