Configuration conf = new Configuration(); String numMaps = new GenericOptionsParser(conf, args).getRemainingArgs()[0]; conf.set(MRJobConfig.NUM_MAPS, numMaps); createHdfsFilesystem(conf); Job job = Job.getInstance(conf, "MapReduceIntegrationChecker"); job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1); } finally { if (mFileSystem.exists(mOutputFilePath)) { mFileSystem.delete(mOutputFilePath, true); mFileSystem.close();
/** * Prints generic command-line argurments and usage information. * * @param out stream to write usage information to. */ public static void printGenericCommandUsage(PrintStream out) { GenericOptionsParser.printGenericCommandUsage(out); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Parse generic options String[] genericCmdLineOpts = new GenericOptionsParser(conf, args).getCommandLine().getArgs(); Properties jobProperties = CliOptions.parseArgs(CliMRJobLauncher.class, genericCmdLineOpts); // Launch and run the job System.exit(ToolRunner.run(new CliMRJobLauncher(conf, jobProperties), args)); } }
/** * Returns {@link ParameterTool} for the arguments parsed by {@link GenericOptionsParser}. * * @param args Input array arguments. It should be parsable by {@link GenericOptionsParser} * @return A {@link ParameterTool} * @throws IOException If arguments cannot be parsed by {@link GenericOptionsParser} * @see GenericOptionsParser */ public static ParameterTool paramsFromGenericOptionsParser(String[] args) throws IOException { Option[] options = new GenericOptionsParser(args).getCommandLine().getOptions(); Map<String, String> map = new HashMap<String, String>(); for (Option option : options) { String[] split = option.getValue().split("="); map.put(split[0], split[1]); } return ParameterTool.fromMap(map); } }
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (!doCommandLine(otherArgs)) { return 1; } Job job = createSubmittableJob(otherArgs); if (!job.waitForCompletion(true)) { LOG.info("Map-reduce job failed!"); return 1; } counters = job.getCounters(); return 0; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { theLogger.warn("Usage: SortInMemoryProjectionDriver <input> <output>"); System.exit(1); Job job = new Job(conf, "SortInMemoryProjectionDriver"); job.setMapperClass(SortInMemoryProjectionMapper.class); job.setReducerClass(SortInMemoryProjectionReducer.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.setInputFormatClass(TextInputFormat.class);
/** * Main entry point. * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); CommandLine cmd = parseArgs(otherArgs); // check debug flag and other options if (cmd.hasOption("d")) conf.set("conf.debug", "true"); // get details String table = cmd.getOptionValue("t"); String input = cmd.getOptionValue("i"); // create job and set classes etc. Job job = Job.getInstance(conf, "Import from file " + input + " into table " + table); job.setJarByClass(ImportJsonFromFile.class); job.setMapperClass(ImportMapper.class); job.setOutputFormatClass(TableOutputFormat.class); job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Writable.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(input)); // run the job System.exit(job.waitForCompletion(true) ? 0 : 1); } }
new GenericOptionsParser(conf, args).getRemainingArgs(); CommandLine cmd = parseArgs(otherArgs); if (cmd.hasOption("d")) conf.set("conf.debug", "true"); Job job = Job.getInstance(conf, "Analyze data in " + table); job.setJarByClass(AnalyzeData.class); TableMapReduceUtil.initTableMapperJob(table, scan, AnalyzeMapper.class, Text.class, IntWritable.class, job); // co AnalyzeData-6-Util Set up the table mapper phase using the supplied utility. job.setReducerClass(AnalyzeReducer.class); job.setOutputKeyClass(Text.class); // co AnalyzeData-7-Output Configure the reduce phase using the normal Hadoop syntax. job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(output)); System.exit(job.waitForCompletion(true) ? 0 : 1);
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat); conf.set("nl.basjes.parse.apachehttpdlogline.fields", "STRING:request.status.last"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(Wordcount.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(otherArgs[1]); fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); job.setOutputKeyClass(Text.class);
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: invertedindex <inDir> [<in>...] <out>"); System.exit(2); Job job = Job.getInstance(conf, "invert index"); job.setJarByClass(InvertedIndex.class); job.setMapperClass(WordToFileMapper.class); job.setReducerClass(WordToFileCountReducer.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); Path outputPath = new Path(otherArgs[1]); outputPath.getFileSystem(conf).delete(outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1);
private void addDistributedCacheFile(File file, Configuration conf) throws IOException { String HADOOP_TMP_FILES = "tmpfiles"; // see Hadoop's GenericOptionsParser String tmpFiles = conf.get(HADOOP_TMP_FILES, ""); if (tmpFiles.length() > 0) { // already present? tmpFiles = tmpFiles + ","; } GenericOptionsParser parser = new GenericOptionsParser( new Configuration(conf), new String[]{"--files", file.getCanonicalPath()}); String additionalTmpFiles = parser.getConfiguration().get(HADOOP_TMP_FILES); assert additionalTmpFiles != null; assert additionalTmpFiles.length() > 0; tmpFiles += additionalTmpFiles; conf.set(HADOOP_TMP_FILES, tmpFiles); }
@SuppressWarnings("deprecation") @Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: NumberOfUsersByState <in> <out>"); Job job = new Job(conf, "StackOverflow Number of Users by State"); job.setJarByClass(CountNumUsersByState.class); job.setMapperClass(CountNumUsersByStateMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); Path outputDir = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); FileSystem.get(conf).delete(outputDir);
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
FileSystem localFs = FileSystem.getLocal(conf); new GenericOptionsParser(conf, args); } catch (Exception e) { th = e; Path tmpPath = new Path(tmpFile.toString()); localFs.create(tmpPath); new GenericOptionsParser(conf, args); String fileName = conf.get("mapreduce.job.credentials.json"); assertNotNull("files is null", fileName); assertEquals("files option does not match", localFs.makeQualified(tmpPath).toString(), fileName); localFs.delete(new Path(testDir.getAbsolutePath()), true);
public static void main(String... argv) throws IOException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, argv).getRemainingArgs(); CommandLine cli = InspectorOptions.parse(new PosixParser(), otherArgs); Path inputPath = new Path(InspectorOptions.INPUT.get(cli)); int n = -1; if(InspectorOptions.NUM.has(cli)) { n = Integer.parseInt(InspectorOptions.NUM.get(cli)); SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), SequenceFile.Reader.file(inputPath) );
Configuration hadoopConf = new Configuration(); String[] otherArgs = null; try { otherArgs = new GenericOptionsParser(hadoopConf, commandArgs).getRemainingArgs(); } catch (IOException e) { LOGGER.error("Failed to configure hadoop with provided options: {}", e.getMessage(), e); config = fixedParser.parse(otherArgs); commonConfig = config; PcapOptions.FINAL_OUTPUT_PATH.put(commonConfig, new Path(execDir)); } catch (ParseException | java.text.ParseException e) { System.err.println(e.getMessage()); config.getYarnQueue().ifPresent(s -> hadoopConf.set(MRJobConfig.QUEUE_NAME, s)); PcapOptions.HADOOP_CONF.put(commonConfig, hadoopConf); try { PcapOptions.FILESYSTEM.put(commonConfig, FileSystem.get(hadoopConf)); results = jobRunner.submit(PcapFinalizerStrategies.CLI, commonConfig).get(); } catch (IOException|InterruptedException | JobException e) { config = queryParser.parse(otherArgs); commonConfig = config; PcapOptions.FINAL_OUTPUT_PATH.put(commonConfig, new Path(execDir)); } catch (ParseException | java.text.ParseException e) { System.err.println(e.getMessage());
new GenericOptionsParser(conf, args).getRemainingArgs(); CommandLine cmd = parseArgs(otherArgs); if (cmd.hasOption("d")) conf.set("conf.debug", "true"); conf.set("conf.infotable", cmd.getOptionValue("o")); // co ParseJsonMulti-4-Conf Store table names in configuration for later use in the mapper. conf.set("conf.linktable", cmd.getOptionValue("l")); Job job = Job.getInstance(conf, "Parse data in " + input + ", into two tables"); job.setJarByClass(ParseJsonMulti.class); TableMapReduceUtil.initTableMapperJob(input, scan, ParseMapper.class, ImmutableBytesWritable.class, Put.class, job); job.setOutputFormatClass(NullOutputFormat.class); // co ParseJsonMulti-5-Null Set the output format to be ignored by the framework. job.setNumReduceTasks(0);
public void testGenericOptionsParser() throws Exception { GenericOptionsParser parser = new GenericOptionsParser( new Configuration(), new String[] {"-jt"}); assertEquals(parser.getRemainingArgs().length, 0); // test if -D accepts -Dx=y=z parser = new GenericOptionsParser(new Configuration(), new String[] {"-Dx=y=z"}); assertEquals(parser.getConfiguration().get("x"), "y=z"); }
@Override public int run(String[] real_args) throws Exception { GenericOptionsParser gop = new GenericOptionsParser(getConf(), real_args); Configuration conf = gop.getConfiguration(); String[] args = gop.getRemainingArgs(); Job validate = new Job(conf); validate.setJobName("Chukwa Test pattern validator"); validate.setJarByClass(this.getClass()); validate.setInputFormatClass(SequenceFileInputFormat.class); validate.setMapperClass(MapClass.class); validate.setMapOutputKeyClass(ByteRange.class); validate.setMapOutputValueClass(NullWritable.class); validate.setReducerClass(ReduceClass.class); validate.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(validate, new Path(args[0])); FileOutputFormat.setOutputPath(validate, new Path(args[1])); validate.submit(); return 0; }