/** * Add a OutputFormat configuration to the Job with a alias name. * * @param alias the name to be given to the OutputFormat configuration * @param outputFormatClass OutputFormat class * @param keyClass the key class for the output data * @param valueClass the value class for the output data * @throws IOException */ public void addOutputFormat(String alias, Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass) throws IOException { Job copy = new Job(this.job.getConfiguration()); outputConfigs.put(alias, copy); copy.setOutputFormatClass(outputFormatClass); copy.setOutputKeyClass(keyClass); copy.setOutputValueClass(valueClass); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/lol")); SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz")); // submit and wait for completion job.waitForCompletion(true); }
protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(c, jobName); initJob(scans, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); job.waitForCompletion(true); assertTrue(job.isSuccessful()); LOG.info("After map/reduce completion - job " + jobName); }
Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(getClass()); job, true, new Path(restoreDir) ); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); job.waitForCompletion(true); scanTimer.stop();
Job job = new Job(); FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath())); job.setInputFormatClass(AvroKeyValueInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT)); AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING)); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(AvroValue.class); Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index"); FileOutputFormat.setOutputPath(job, outputPath); assertTrue(job.waitForCompletion(true));
Configuration conf = new Configuration(); conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); Job job = new Job(conf, "Write-hcat-seq-table"); job.setJarByClass(TestPassProperties.class); job.setMapperClass(Map.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); HCatOutputFormat.setSchema(job, getSchema()); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); new FileOutputCommitterContainer(job, null).cleanupJob(job); } catch (Exception e) {
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); String[] split = labels.split(COMMA); scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2], split[(this.labelIndex * 2) + 1])); TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
public int runCopier(String outputDir) throws Exception { Job job = null; Scan scan = null; job = new Job(getConf()); job.setJobName("Data copier"); job.getConfiguration().setInt("INDEX", labelIndex); job.getConfiguration().set("LABELS", labels); job.setJarByClass(getClass()); scan = new Scan(); scan.setCacheBlocks(false); TableMapReduceUtil.initCredentials(job); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); return success ? 0 : 1;
private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir, boolean putSortReducer) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName()); setupRandomGeneratorMapper(job, putSortReducer); if (tableInfo.size() > 1) { MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo); int sum = 0; for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) { sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size(); } assertEquals(sum, job.getNumReduceTasks()); } else { RegionLocator regionLocator = tableInfo.get(0).getRegionLocator(); HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(), regionLocator); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); } FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)) ; assertTrue(job.waitForCompletion(true)); }
private void runTestOnTable() throws IOException, InterruptedException, ClassNotFoundException { Job job = null; try { job = new Job(UTIL.getConfiguration(), "test123"); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_NAME); scan.setTimeRange(MINSTAMP, MAXSTAMP); scan.setMaxVersions(); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job); job.waitForCompletion(true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (job != null) { FileUtil.fullyDelete( new File(job.getConfiguration().get("hadoop.tmp.dir"))); } } }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan())); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; System.arraycopy(args, 2, fields, 0, fields.length); conf.setStrings("index.fields", fields); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }
Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job, false); job.setReducerClass(CellSortReducer.class); job.setOutputFormatClass(HFileOutputFormat2.class); job.setNumReduceTasks(4); CellSerialization.class.getName()); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus [] files = fs.listStatus(testDir); assertTrue(files.length > 0);
Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(getClass()); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); job.waitForCompletion(true); scanTimer.stop();
Job job = new Job(util.getConfiguration()); Scan scan = new Scan(startRow, endRow); // limit the scan job.setJarByClass(util.getClass()); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), TestTableSnapshotInputFormat.class); job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); Assert.assertTrue(job.waitForCompletion(true)); } finally { if (!shutdownCluster) {
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion");
Job job = new Job(conf); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setJarByClass(Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/")); Path out = new Path("files/out/processed/"); fs.delete(out, true); job.waitForCompletion(true);
try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName().getNameAsString()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion");
Job job = new Job(); job.setMapperClass(MyMapper.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0;