public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
@SuppressWarnings("unchecked") private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { super.read(in); String hadoopOutputFormatName = in.readUTF(); if (jobConf == null) { jobConf = new JobConf(); } jobConf.readFields(in); try { this.mapredOutputFormat = (org.apache.hadoop.mapred.OutputFormat<K, V>) Class.forName(hadoopOutputFormatName, true, Thread.currentThread().getContextClassLoader()).newInstance(); } catch (Exception e) { throw new RuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(mapredOutputFormat, jobConf); jobConf.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobConf.getCredentials().addAll(currentUserCreds); } } }
/** * Make a copy of a writable object using serialization to a buffer. * @param orig The object to copy * @return The copied object */ public static <T extends Writable> T clone(T orig, Configuration conf) { try { @SuppressWarnings("unchecked") // Unchecked cast from Class to Class<T> T newInst = ReflectionUtils.newInstance((Class<T>) orig.getClass(), conf); ReflectionUtils.copy(conf, orig, newInst); return newInst; } catch (IOException e) { throw new RuntimeException("Error writing/reading clone buffer", e); } }
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName()); String jobInfoString = context.getConfiguration().get( HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass(); AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration()); context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName()); (org.apache.hadoop.mapred.RecordWriter)null, context); } else { Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part"))); parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)),
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath + codec.getDefaultExtension())); } }
ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); currTaskContext.getJobConf().getOutputCommitter(); currTaskContext.getConfiguration().set( "mapred.work.output.dir", new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext) .getWorkPath().toString()); Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal; FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
public void testAvoidSplitCombination() throws Exception { Configuration conf = new Configuration(); JobConf job = new JobConf(conf); TableDesc tblDesc = Utilities.defaultTd; tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path("/tmp/testfolder1"), partDesc); pt.put(new Path("/tmp/testfolder2"), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive"); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); try { Path[] paths = new Path[2]; paths[0] = new Path("/tmp/testfolder1"); paths[1] = new Path("/tmp/testfolder2"); CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf); combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo(); Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2); assertEquals("Should have both path indices in the results set", 2, results.size()); } finally { // Cleanup the mapwork path FileSystem.get(conf).delete(mapWorkPath, true); } }
reader = new BufferedReader(new InputStreamReader(xlearningProcess.getInputStream())); List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation()); JobConf jobConf = new JobConf(conf); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); jobConf.setOutputFormat(TextMultiOutputFormat.class); Path remotePath = new Path(outputs.get(0).getDfsLocation() + "/_temporary/" + containerId.toString()); FileSystem dfs = remotePath.getFileSystem(jobConf); jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString()); OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class), jobConf); outputFormat.checkOutputSpecs(dfs, jobConf); dfs.close(); } catch (Exception e) { LOG.warn("Exception in thread stdoutRedirectThread");
try { boolean sendSerializedEvents = conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true); boolean generateConsistentSplits = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS); LOG.info("GenerateConsistentSplitsInHive=" + generateConsistentSplits); String realInputFormatName = conf.get("mapred.input.format.class"); boolean groupingEnabled = userPayloadProto.getGroupingEnabled(); if (groupingEnabled) { (InputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), jobConf); final long blockSize = conf.getLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, MapWork mapWork = Utilities.getMapWork(jobConf); List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork); FileSystem fs = paths.get(0).getFileSystem(jobConf); FileStatus[] fileStatuses = fs.listStatus(paths.get(0)); if (fileStatuses.length == 0) { final String fileStr = path.toString(); if (!files.contains(fileStr)) { files.add(fileStr);
JobConf newJob = new JobConf(job); FileSystem fs = dataDir1.getFileSystem(newJob); int symbolLinkedFileSize = 0; Path dir1_file1 = new Path(dataDir1, "combinefile1_1"); writeTextFile(dir1_file1, "dir1_file1_line1\n" + "dir1_file1_line2\n"); symbolLinkedFileSize += fs.getFileStatus(dir1_file1).getLen(); Path dir2_file1 = new Path(dataDir2, "combinefile2_1"); writeTextFile(dir2_file1, "dir2_file1_line1\n" + "dir2_file1_line2\n"); symbolLinkedFileSize += fs.getFileStatus(dir2_file1).getLen(); Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fileSys = emptyScratchDir.getFileSystem(newJob); fileSys.mkdirs(emptyScratchDir); CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance( CombineHiveInputFormat.class, newJob);
public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); InputStream is = hdfs.open(new Path(args[0])); Class<?> codecClass = Class.forName(args[1]); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); InputStream cis = codec.createInputStream(is); IOUtils.copyBytes(cis, System.out, config, true); IOUtils.closeStream(is); } }
private void init() throws Exception { Configuration conf = new Configuration(); conf.addResource(new Path(confLocation)); String tokenStoreClassName = MetastoreConf.getVar(conf,MetastoreConf.ConfVars.DELEGATION_TOKEN_STORE_CLS, ""); if (StringUtils.isBlank(tokenStoreClassName)) { throw new Exception("Could not find Delegation TokenStore implementation."); } Class<? extends DelegationTokenStore> clazz = Class.forName(tokenStoreClassName).asSubclass(DelegationTokenStore.class); delegationTokenStore = ReflectionUtils.newInstance(clazz, conf); delegationTokenStore.init(null, serverMode); }
public static void copyTo64MB(String src, String dst) throws IOException { Configuration hconf = new Configuration(); Path srcPath = new Path(src); Path dstPath = new Path(dst); FileSystem fs = FileSystem.get(hconf); long srcSize = fs.getFileStatus(srcPath).getLen(); int copyTimes = (int) (67108864 / srcSize); // 64 MB System.out.println("Copy " + copyTimes + " times"); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf))); int count = 0; while (reader.next(key, value)) { for (int i = 0; i < copyTimes; i++) { writer.append(key, value); count++; } } System.out.println("Len: " + writer.getLength()); System.out.println("Rows: " + count); reader.close(); writer.close(); }
/** * Returns a brand new instance of the FileSystem. It does not use * the FileSystem.Cache. In newer versions of HDFS, we can directly * invoke FileSystem.newInstance(Configuration). * * @param conf Configuration * @return A new instance of the filesystem */ private static FileSystem newInstanceFileSystem(Configuration conf) throws IOException { URI uri = FileSystem.getDefaultUri(conf); FileSystem fs = null; Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null); if (clazz != null) { // This will be true for Hadoop 1.0, or 0.20. fs = (FileSystem) org.apache.hadoop.util.ReflectionUtils.newInstance(clazz, conf); fs.initialize(uri, conf); } else { // For Hadoop 2.0, we have to go through FileSystem for the filesystem // implementation to be loaded by the service loader in case it has not // been loaded yet. Configuration clone = new Configuration(conf); clone.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true); fs = FileSystem.get(uri, clone); } if (fs == null) { throw new IOException("No FileSystem for scheme: " + uri.getScheme()); } return fs; }
context.getConfiguration().get(BatchConstants.ARG_META_URL)); final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME); final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID); final String statOutputPath = context.getConfiguration() .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt()); conf = HadoopUtil.getCurrentConfiguration(); reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(statOutputPath, FSDataInputStream fis = fs.open(statisticsFilePath);
/** * Test using the gzip codec and an empty input file */ @Test public void testGzipEmpty() throws IOException { JobConf job = new JobConf(defaultConf); CompressionCodec gzip = new GzipCodec(); ReflectionUtils.setConf(gzip, job); localFs.delete(workDir, true); writeFile(localFs, new Path(workDir, "empty.gz"), gzip, ""); FileInputFormat.setInputPaths(job, workDir); TextInputFormat format = new TextInputFormat(); format.configure(job); InputSplit[] splits = format.getSplits(job, 100); assertEquals("Compressed files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length); List<Text> results = readSplit(format, splits[0], job); assertEquals("Compressed empty file length == 0", 0, results.size()); }
Configuration conf) throws Exception { String dr = (dryrun ? "[DRY RUN ] " : ""); Path data = new Path(dir, DATA_FILE_NAME); Path index = new Path(dir, INDEX_FILE_NAME); int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128); if (!fs.exists(data)) { if (fs.exists(index)) { Writable key = ReflectionUtils.newInstance(keyClass, conf); Writable value = ReflectionUtils.newInstance(valueClass, conf); SequenceFile.Writer indexWriter = null; if (!dryrun) {
isTemporary = conf.isTemporary(); multiFileSpray = conf.isMultiFileSpray(); this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; totalFiles = conf.getTotalFiles(); numFiles = conf.getNumFiles(); taskId = originalTaskId = Utilities.getTaskId(hconf); initializeSpecPath(); fs = specPath.getFileSystem(hconf); } else { jc = new JobConf(hconf); timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2; prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance( jc.getPartitionerClass(), null); if (shim != null) { fs.mkdirs(outputPath); shim.setStoragePolicy(outputPath, tmpStorage);