org.apache.hadoop.util.GenericOptionsParser java code examples

Refine search

Configuration conf = new Configuration();
String numMaps =  new GenericOptionsParser(conf, args).getRemainingArgs()[0];
conf.set(MRJobConfig.NUM_MAPS, numMaps);
createHdfsFilesystem(conf);
Job job = Job.getInstance(conf, "MapReduceIntegrationChecker");
job.setJarByClass(MapReduceIntegrationChecker.class);
job.setMapperClass(CheckerMapper.class);
job.setCombinerClass(CheckerReducer.class);
job.setReducerClass(CheckerReducer.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(EmptyInputFormat.class);
FileOutputFormat.setOutputPath(job, mOutputFilePath);
   : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1);
} finally {
 if (mFileSystem.exists(mOutputFilePath)) {
  mFileSystem.delete(mOutputFilePath, true);
 mFileSystem.close();

/**
 * Prints generic command-line argurments and usage information.
 * 
 *  @param out stream to write usage information to.
 */
public static void printGenericCommandUsage(PrintStream out) {
 GenericOptionsParser.printGenericCommandUsage(out);
}

 public static void main(String[] args) throws Exception {

  Configuration conf = new Configuration();
  // Parse generic options
  String[] genericCmdLineOpts = new GenericOptionsParser(conf, args).getCommandLine().getArgs();

  Properties jobProperties = CliOptions.parseArgs(CliMRJobLauncher.class, genericCmdLineOpts);

  // Launch and run the job
  System.exit(ToolRunner.run(new CliMRJobLauncher(conf, jobProperties), args));
 }
}

  /**
   * Returns {@link ParameterTool} for the arguments parsed by {@link GenericOptionsParser}.
   *
   * @param args Input array arguments. It should be parsable by {@link GenericOptionsParser}
   * @return A {@link ParameterTool}
   * @throws IOException If arguments cannot be parsed by {@link GenericOptionsParser}
   * @see GenericOptionsParser
   */
  public static ParameterTool paramsFromGenericOptionsParser(String[] args) throws IOException {
    Option[] options = new GenericOptionsParser(args).getCommandLine().getOptions();
    Map<String, String> map = new HashMap<String, String>();
    for (Option option : options) {
      String[] split = option.getValue().split("=");
      map.put(split[0], split[1]);
    }
    return ParameterTool.fromMap(map);
  }
}

@Override
public int run(String[] args) throws Exception {
 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
 if (!doCommandLine(otherArgs)) {
  return 1;
 }
 Job job = createSubmittableJob(otherArgs);
 if (!job.waitForCompletion(true)) {
  LOG.info("Map-reduce job failed!");
  return 1;
 }
 counters = job.getCounters();
 return 0;
}

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: wordcount <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

public static void main(String[] args) throws Exception {
  long startTime = System.currentTimeMillis();	
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
   theLogger.warn("Usage: SortInMemoryProjectionDriver <input> <output>");
   System.exit(1);
  Job job = new Job(conf, "SortInMemoryProjectionDriver");
  job.setMapperClass(SortInMemoryProjectionMapper.class);
  job.setReducerClass(SortInMemoryProjectionReducer.class);
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  job.setInputFormatClass(TextInputFormat.class);

 /**
  * Main entry point.
  *
  * @param args  The command line parameters.
  * @throws Exception When running the job fails.
  */
 public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  CommandLine cmd = parseArgs(otherArgs);
  // check debug flag and other options
  if (cmd.hasOption("d")) conf.set("conf.debug", "true");
  // get details
  String table = cmd.getOptionValue("t");
  String input = cmd.getOptionValue("i");
  // create job and set classes etc.
  Job job = Job.getInstance(conf, "Import from file " + input +
   " into table " + table);
  job.setJarByClass(ImportJsonFromFile.class);
  job.setMapperClass(ImportMapper.class);
  job.setOutputFormatClass(TableOutputFormat.class);
  job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Writable.class);
  job.setNumReduceTasks(0);
  FileInputFormat.addInputPath(job, new Path(input));
  // run the job
  System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
}

 new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
if (cmd.hasOption("d")) conf.set("conf.debug", "true");
Job job = Job.getInstance(conf, "Analyze data in " + table);
job.setJarByClass(AnalyzeData.class);
TableMapReduceUtil.initTableMapperJob(table, scan, AnalyzeMapper.class,
 Text.class, IntWritable.class, job); // co AnalyzeData-6-Util Set up the table mapper phase using the supplied utility.
job.setReducerClass(AnalyzeReducer.class);
job.setOutputKeyClass(Text.class); // co AnalyzeData-7-Output Configure the reduce phase using the normal Hadoop syntax.
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job, new Path(output));
System.exit(job.waitForCompletion(true) ? 0 : 1);

@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args)
      .getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: wordcount <in> <out>");
  conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat);
  conf.set("nl.basjes.parse.apachehttpdlogline.fields",
      "STRING:request.status.last");
  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(Wordcount.class);
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(LongSumReducer.class);
  FileSystem fs = FileSystem.get(conf);
  Path outputPath = new Path(otherArgs[1]);
  fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(job, outputPath);
  job.setOutputKeyClass(Text.class);

public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: invertedindex <inDir> [<in>...] <out>");
    System.exit(2);
  Job job = Job.getInstance(conf, "invert index");
  job.setJarByClass(InvertedIndex.class);
  job.setMapperClass(WordToFileMapper.class);
  job.setReducerClass(WordToFileCountReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  Path outputPath = new Path(otherArgs[1]);
  outputPath.getFileSystem(conf).delete(outputPath);
  System.exit(job.waitForCompletion(true) ? 0 : 1);

private void addDistributedCacheFile(File file, Configuration conf) throws IOException {
  String HADOOP_TMP_FILES = "tmpfiles"; // see Hadoop's GenericOptionsParser
  String tmpFiles = conf.get(HADOOP_TMP_FILES, "");
  if (tmpFiles.length() > 0) { // already present?
    tmpFiles = tmpFiles + ",";
  }
  GenericOptionsParser parser = new GenericOptionsParser(
      new Configuration(conf),
      new String[]{"--files", file.getCanonicalPath()});
  String additionalTmpFiles = parser.getConfiguration().get(HADOOP_TMP_FILES);
  assert additionalTmpFiles != null;
  assert additionalTmpFiles.length() > 0;
  tmpFiles += additionalTmpFiles;
  conf.set(HADOOP_TMP_FILES, tmpFiles);
}

@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args)
      .getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: NumberOfUsersByState <in> <out>");
  Job job = new Job(conf, "StackOverflow Number of Users by State");
  job.setJarByClass(CountNumUsersByState.class);
  job.setMapperClass(CountNumUsersByStateMapper.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  Path outputDir = new Path(otherArgs[1]);
  FileOutputFormat.setOutputPath(job, outputDir);
  boolean success = job.waitForCompletion(true);
  FileSystem.get(conf).delete(outputDir);

public static void main(String[] args) throws Exception {
 Configuration conf = new Configuration();
 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
 if (otherArgs.length != 2) {
  System.err.println("Usage: secondarysort <in> <out>");
  System.exit(2);
 }
 Job job = Job.getInstance(conf, "secondary sort");
 job.setJarByClass(SecondarySort.class);
 job.setMapperClass(MapClass.class);
 job.setReducerClass(Reduce.class);
 // group and partition by the first int in the pair
 job.setPartitionerClass(FirstPartitioner.class);
 job.setGroupingComparatorClass(FirstGroupingComparator.class);
 // the map output is IntPair, IntWritable
 job.setMapOutputKeyClass(IntPair.class);
 job.setMapOutputValueClass(IntWritable.class);
 // the reduce output is Text, IntWritable
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 
 FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
 FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

FileSystem localFs = FileSystem.getLocal(conf);
 new GenericOptionsParser(conf, args);
} catch (Exception e) {
 th = e;
Path tmpPath = new Path(tmpFile.toString());
localFs.create(tmpPath);
new GenericOptionsParser(conf, args);
String fileName = conf.get("mapreduce.job.credentials.json");
assertNotNull("files is null", fileName);
assertEquals("files option does not match",
 localFs.makeQualified(tmpPath).toString(), fileName);
localFs.delete(new Path(testDir.getAbsolutePath()), true);

public static void main(String... argv) throws IOException {
 Configuration conf = new Configuration();
 String[] otherArgs = new GenericOptionsParser(conf, argv).getRemainingArgs();
 CommandLine cli = InspectorOptions.parse(new PosixParser(), otherArgs);
 Path inputPath = new Path(InspectorOptions.INPUT.get(cli));
 int n = -1;
 if(InspectorOptions.NUM.has(cli)) {
  n = Integer.parseInt(InspectorOptions.NUM.get(cli));
 SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
     SequenceFile.Reader.file(inputPath)
 );

Configuration hadoopConf = new Configuration();
String[] otherArgs = null;
try {
 otherArgs = new GenericOptionsParser(hadoopConf, commandArgs).getRemainingArgs();
} catch (IOException e) {
 LOGGER.error("Failed to configure hadoop with provided options: {}", e.getMessage(), e);
  config = fixedParser.parse(otherArgs);
  commonConfig = config;
  PcapOptions.FINAL_OUTPUT_PATH.put(commonConfig, new Path(execDir));
 } catch (ParseException | java.text.ParseException e) {
  System.err.println(e.getMessage());
 config.getYarnQueue().ifPresent(s -> hadoopConf.set(MRJobConfig.QUEUE_NAME, s));
 PcapOptions.HADOOP_CONF.put(commonConfig, hadoopConf);
 try {
  PcapOptions.FILESYSTEM.put(commonConfig, FileSystem.get(hadoopConf));
  results = jobRunner.submit(PcapFinalizerStrategies.CLI, commonConfig).get();
 } catch (IOException|InterruptedException | JobException e) {
  config = queryParser.parse(otherArgs);
  commonConfig = config;
  PcapOptions.FINAL_OUTPUT_PATH.put(commonConfig, new Path(execDir));
 } catch (ParseException | java.text.ParseException e) {
  System.err.println(e.getMessage());

 new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
if (cmd.hasOption("d")) conf.set("conf.debug", "true");
conf.set("conf.infotable", cmd.getOptionValue("o")); // co ParseJsonMulti-4-Conf Store table names in configuration for later use in the mapper.
conf.set("conf.linktable", cmd.getOptionValue("l"));
Job job = Job.getInstance(conf, "Parse data in " + input +
 ", into two tables");
job.setJarByClass(ParseJsonMulti.class);
TableMapReduceUtil.initTableMapperJob(input, scan, ParseMapper.class,
 ImmutableBytesWritable.class, Put.class, job);
job.setOutputFormatClass(NullOutputFormat.class); // co ParseJsonMulti-5-Null Set the output format to be ignored by the framework.
job.setNumReduceTasks(0);

public void testGenericOptionsParser() throws Exception {
  GenericOptionsParser parser = new GenericOptionsParser(
   new Configuration(), new String[] {"-jt"});
 assertEquals(parser.getRemainingArgs().length, 0);
 
 //  test if -D accepts -Dx=y=z
 parser = 
  new GenericOptionsParser(new Configuration(), 
               new String[] {"-Dx=y=z"});
 assertEquals(parser.getConfiguration().get("x"), "y=z");
}

@Override
public int run(String[] real_args) throws Exception {
 GenericOptionsParser gop = new GenericOptionsParser(getConf(), real_args);
 Configuration conf = gop.getConfiguration();
 String[] args = gop.getRemainingArgs();
 Job validate = new Job(conf);
 
 validate.setJobName("Chukwa Test pattern validator");
 validate.setJarByClass(this.getClass());
 
 validate.setInputFormatClass(SequenceFileInputFormat.class);
 
 validate.setMapperClass(MapClass.class);
 validate.setMapOutputKeyClass(ByteRange.class);
 validate.setMapOutputValueClass(NullWritable.class);
 validate.setReducerClass(ReduceClass.class);
 validate.setOutputFormatClass(TextOutputFormat.class);
 
 FileInputFormat.setInputPaths(validate, new Path(args[0]));
 FileOutputFormat.setOutputPath(validate, new Path(args[1]));
 validate.submit();
 return 0;
}

Javadoc

GenericOptionsParser is a utility to parse command line arguments generic to the Hadoop framework. GenericOptionsParser recognizes several standarad command line arguments, enabling applications to easily specify a namenode, a jobtracker, additional configuration resources etc.

Generic Options

The supported generic options are:

 
-conf <configuration file>     specify a configuration file 
-D <property=value>            use value for given property 
-fs <local|namenode:port>      specify a namenode 
-jt <local|jobtracker:port>    specify a job tracker 
-files <comma separated list of files>    specify comma separated 
files to be copied to the map reduce cluster 
-libjars <comma separated list of jars>   specify comma separated 
jar files to include in the classpath. 
-archives <comma separated list of archives>    specify comma 
separated archives to be unarchived on the compute machines.

The general command line syntax is:

 
bin/hadoop command [genericOptions] [commandOptions]

Generic command line arguments might modify Configuration objects, given to constructors.

The functionality is implemented using Commons CLI.

Examples:

 
$ bin/hadoop dfs -fs darwin:8020 -ls /data 
list /data directory in dfs with namenode darwin:8020 
$ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data 
list /data directory in dfs with namenode darwin:8020 
$ bin/hadoop dfs -conf hadoop-site.xml -ls /data 
list /data directory in dfs with conf specified in hadoop-site.xml 
$ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml 
submit a job to job tracker darwin:50020 
$ bin/hadoop job -jt darwin:50020 -submit job.xml 
submit a job to job tracker darwin:50020 
$ bin/hadoop job -jt local -submit job.xml 
submit a job to local runner 
$ bin/hadoop jar -libjars testlib.jar  
-archives test.tgz -files file.txt inputjar args 
job submission with libjars, files and archives

Most used methods

<init>
Create an options parser to parse the args.
getRemainingArgs
Returns an array of Strings containing only application-specific arguments.
printGenericCommandUsage
Print the usage message for generic command-line options supported.
getConfiguration
Get the modified configuration
getCommandLine
Returns the commons-cli CommandLine object to process the parsed arguments. Note: If the object is c
getLibJars
If libjars are set in the conf, parse the libjars.
buildGeneralOptions
Specify properties of each generic option
parseGeneralOptions
Parse the user-specified options, get the generic options, and modify configuration accordingly.
processGeneralOptions
Modify configuration according user-specified generic options
validateFiles
takes input as a comma separated list of files and verifies if they exist. It defaults for file:///
preProcessForWindows
Windows powershell and cmd can parse key=value themselves, because /pkey=value is same as /pkey valu
expandWildcard

Popular in Java

Making http requests using okhttp
getExternalFilesDir (Context)
scheduleAtFixedRate (ScheduledExecutorService)
addToBackStack (FragmentTransaction)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Path (java.nio.file)
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Top plugins for Android Studio

How to useGenericOptionsParser in org.apache.hadoop.util

Best Java code snippets using org.apache.hadoop.util.GenericOptionsParser (Showing top 20 results out of 1,197)

Refine search

How to use
GenericOptionsParser
in
org.apache.hadoop.util