Refine search
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
@Override public Void call() throws IOException, InterruptedException { final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]); outputCommitter.setupTask(taCtx[taskIdx]); final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]); writeOutput(rw, taCtx[taskIdx]); outputCommitter.commitTask(taCtx[taskIdx]); return null; } });
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
private void testAbortInternal(int version) throws IOException, InterruptedException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); File expectedFile = new File(new Path(committer.getWorkPath(), partFile) .toString()); assertFalse("task temp dir still exists", expectedFile.exists());
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "QuestionAnswerBuilder"); job.setJarByClass(QuestionAnswerBuilder.class); job.setMapperClass(HierarchyMapper.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setReducerClass(QuestionAnswerReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) { System.out.println("Usage: CountRowKeys tableName outputPath"); return 1; } Job job = new Job(getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(ServerConstants.getTablesDir() + "/" + args[0] + "/*/*/data")); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
private boolean runJob(float badRecordThreshold) throws Exception { Configuration conf = new Configuration(); conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, "default", "test_bad_records"); job.setMapOutputKeyClass(HCatRecord.class); job.setMapOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); if (path.getFileSystem(conf).exists(path)) { path.getFileSystem(conf).delete(path, true); } TextOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true); }
FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job1 = Job.getInstance(getConf()); job1.setJobName("sequence collection conversion (phase 1)"); job1.setJarByClass(this.getClass()); job1.getConfiguration().setStrings("de.mpii.tools.itemSeparator", itemSeparator); FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration())); TextOutputFormat.setOutputPath(job1, new Path(output + "/wc")); job1.getConfiguration().set("mapreduce.cluster.mapmemory.mb", "4096"); job1.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096");
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations()); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[6])); job.waitForCompletion(true);
@SuppressWarnings("unchecked") @Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Path inputPath = new Path(args[0]); Path partitionFile = new Path(args[1] + "_partitions.lst"); Path outputStage = new Path(args[1] + "_staging"); Path outputOrder = new Path(args[1]); Job sampleJob = new Job(conf, "TotalOrderSortingStage"); sampleJob.setJarByClass(TotalOrderSortingStage.class); sampleJob.setMapperClass(LastAccessMapper.class); sampleJob.setNumReduceTasks(0); sampleJob.setOutputKeyClass(Text.class); SequenceFileInputFormat.setInputPaths(orderJob, outputStage); TextOutputFormat.setOutputPath(orderJob, outputOrder); orderJob.getConfiguration().set( "mapred.textoutputformat.separator", ""); FileSystem.get(new Configuration()).delete(partitionFile, false); FileSystem.get(new Configuration()).delete(outputStage, true); return code;
FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job1 = new Job(getConf()); job1.setJobName("document collection conversion (phase 1)"); job1.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs); job1.setJarByClass(this.getClass()); FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration())); TextOutputFormat.setOutputPath(job1, new Path(output + "/wc")); job2.setJobName("document collection conversion (phase 2)"); job2.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs); job2.setJarByClass(this.getClass());
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Path postInput = new Path(otherArgs[0]); Path userInput = new Path(otherArgs[1]); Path outputDirIntermediate = new Path(otherArgs[2] + "_int"); Path outputDir = new Path(otherArgs[2]); Job countingJob = new Job(conf, "JobChaining-Counting"); countingJob.setJarByClass(BasicJobChaining.class); countingJob.setMapperClass(UserIdCountMapper.class); countingJob.setCombinerClass(LongSumReducer.class); countingJob.setReducerClass(UserIdSumReducer.class); countingJob.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(countingJob, outputDirIntermediate); Job binningJob = new Job(new Configuration(), "JobChaining-Binning"); binningJob.setJarByClass(BasicJobChaining.class); MultipleOutputs.setCountersEnabled(binningJob, true); TextOutputFormat.setOutputPath(binningJob, outputDir);
private static Job ultimateUnionMapReduce(Path input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "UltimateUnion"); job.setJarByClass(UltimateUnion.class); // Set map and reduce job.setMapperClass(UltimateUnionMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(OGCJTSShape.class); job.setNumReduceTasks(0); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.addInputPath(job, input); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, output); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
Job job = Job.getInstance(conf); job.setJarByClass(TextOutputExample.class); job.setMapperClass(RyaToRdfMapper.class); job.setNumReduceTasks(0); Path outputPath = new Path(tempDir, "rdf-output"); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class);
opts.parseArgs(this.getClass().getName(), args); Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); opts.setAccumuloConfigs(job); job.setOutputFormatClass(TextOutputFormat.class); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", opts.scanOffline); TextOutputFormat.setOutputPath(job, new Path(opts.outputDir)); job.waitForCompletion(true);
JobConf conf = new JobConf(); conf.set("mapreduce.framework.name", "local"); Job job = new Job(conf); TextInputFormat.setInputPaths(job, new Path(in.getPath())); TextOutputFormat.setOutputPath(job, new Path(out.getPath())); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setCombinerKeyGroupingComparatorClass(GroupComparator.class); job.setCombinerClass(Combiner.class); job.getConfiguration().setInt("min.num.spills.for.combine", 0); job.submit();
private void testConcurrentCommitTaskWithSubDir(int version) throws Exception { final Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); final Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); conf.setClass("fs.file.impl", RLFS.class, FileSystem.class); FileSystem.closeAll(); final int taskIdx = i; executor.submit((Callable<Void>) () -> { final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]); outputCommitter.setupTask(taCtx[taskIdx]); final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]); writeOutput(rw, taCtx[taskIdx]); outputCommitter.commitTask(taCtx[taskIdx]); final RawLocalFileSystem lfs = new RawLocalFileSystem(); lfs.setConf(conf); assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR))); FileUtil.fullyDelete(new File(outDir.toString()));
public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers); job = Job.getInstance(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true);
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job ) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator= conf.get(SEPERATOR, "\t"); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream (codec.createOutputStream(fileOut)), keyValueSeparator); } } }
private int createParitionFile(String sequenceFileInput, String outputPath, float frequency) throws IOException, ClassNotFoundException, InterruptedException { Configuration config = getConf(); config.setFloat(SAMPLE_FREQUENCY, frequency); Job sampler = new Job(config); sampler.setInputFormatClass(TextInputFormat.class); sampler.setOutputFormatClass(TextOutputFormat.class); sampler.setOutputKeyClass(Text.class); sampler.setOutputValueClass(NullWritable.class); sampler.setNumReduceTasks(0); sampler.setMapperClass(Map.class); TextInputFormat.addInputPath(sampler, new Path(sequenceFileInput)); TextOutputFormat.setOutputPath(sampler, new Path(outputPath)); sampler.waitForCompletion(true); return 0; }