/** * Returns if the counters for the named outputs are enabled or not. * <p/> * By default these counters are disabled. * <p/> * MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link AvroMultipleOutputs} class name. * </p> * The names of the counters are the same as the named outputs. For multi * named outputs the name of the counter is the concatenation of the named * output, and underscore '_' and the multiname. * * * @param conf job conf to enableadd the named output. * @return TRUE if the counters are enabled, FALSE if they are disabled. */ public static boolean getCountersEnabled(JobConf conf) { return conf.getBoolean(COUNTERS_ENABLED, false); }
/** * Returns if a named output is multiple. * * @param conf job conf * @param namedOutput named output * @return <code>true</code> if the name output is multi, <code>false</code> * if it is single. If the name output is not defined it returns * <code>false</code> */ public static boolean isMultiNamedOutput(JobConf conf, String namedOutput) { checkNamedOutput(conf, namedOutput, false); return conf.getBoolean(MO_PREFIX + namedOutput + MULTI, false); }
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { if (job.getBoolean(AvroInputFormat.IGNORE_FILES_WITHOUT_EXTENSION_KEY, AvroInputFormat.IGNORE_INPUTS_WITHOUT_EXTENSION_DEFAULT)) { List<FileStatus> result = new ArrayList<>(); for (FileStatus file : super.listStatus(job)) if (file.getPath().getName().endsWith(AvroOutputFormat.EXT)) result.add(file); return result.toArray(new FileStatus[0]); } else { return super.listStatus(job); } }
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { if (job.getBoolean(AvroInputFormat.IGNORE_FILES_WITHOUT_EXTENSION_KEY, AvroInputFormat.IGNORE_INPUTS_WITHOUT_EXTENSION_DEFAULT)) { List<FileStatus> result = new ArrayList<>(); for (FileStatus file : super.listStatus(job)) if (file.getPath().getName().endsWith(AvroOutputFormat.EXT)) result.add(file); return result.toArray(new FileStatus[0]); } else { return super.listStatus(job); } }
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { if (job.getBoolean(IGNORE_FILES_WITHOUT_EXTENSION_KEY, IGNORE_INPUTS_WITHOUT_EXTENSION_DEFAULT)) { List<FileStatus> result = new ArrayList<>(); for (FileStatus file : super.listStatus(job)) if (file.getPath().getName().endsWith(AvroOutputFormat.EXT)) result.add(file); return result.toArray(new FileStatus[0]); } else { return super.listStatus(job); } }
/** * Lists status for all files under a given path. Whether or not this is recursive depends on the * setting of job configuration parameter mapred.input.dir.recursive. * * @param fs * file system * * @param p * path in file system * * @return list of file status entries */ private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = job.getBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, false); // If this is in acid format always read it recursively regardless of what the jobconf says. if (!recursive && !AcidUtils.isAcid(p, job)) { return fs.listStatus(p, FileUtils.HIDDEN_FILES_PATH_FILTER); } List<FileStatus> results = new ArrayList<FileStatus>(); for (FileStatus stat : fs.listStatus(p, FileUtils.HIDDEN_FILES_PATH_FILTER)) { FileUtils.listStatusRecursively(fs, stat, results); } return results.toArray(new FileStatus[results.size()]); }
public void configure(JobConf conf) { this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml"))); if(storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt(NUM_CHUNKS, -1); if(this.numChunks < 1) { // A bit of defensive code for good measure, but should never happen anymore, now that the config cannot // be overridden by the user. throw new VoldemortException(NUM_CHUNKS + " not specified in the MapReduce JobConf (should NEVER happen)"); } this.saveKeys = conf.getBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, true); this.reducerPerBucket = conf.getBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, true); this.buildPrimaryReplicasOnly = conf.getBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, false); if (buildPrimaryReplicasOnly && !saveKeys) { throw new IllegalStateException(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY + " can only be true if " + VoldemortBuildAndPushJob.SAVE_KEYS + " is also true."); } }
/** * Lists status for all files under a given path. Whether or not this is recursive depends on the * setting of job configuration parameter mapred.input.dir.recursive. * * @param fs * file system * * @param p * path in file system * * @return list of file status entries */ private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = job.getBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, false); // If this is in acid format always read it recursively regardless of what the jobconf says. if (!recursive && !AcidUtils.isAcid(p, job)) { return fs.listStatus(p, FileUtils.HIDDEN_FILES_PATH_FILTER); } List<FileStatus> results = new ArrayList<FileStatus>(); for (FileStatus stat : fs.listStatus(p, FileUtils.HIDDEN_FILES_PATH_FILTER)) { FileUtils.listStatusRecursively(fs, stat, results); } return results.toArray(new FileStatus[results.size()]); }
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
/** * Set working directory in local file system. * * @param dir Working directory. * @throws IOException If fails. */ private void setLocalFSWorkingDirectory(File dir) throws IOException { JobConf cfg = ctx.getJobConf(); ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(cfg.getClassLoader()); try { cfg.set(HadoopFileSystemsUtils.LOC_FS_WORK_DIR_PROP, dir.getAbsolutePath()); if (!cfg.getBoolean(FILE_DISABLE_CACHING_PROPERTY_NAME, false)) FileSystem.getLocal(cfg).setWorkingDirectory(new Path(dir.getAbsolutePath())); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
@Override public void conf(JobConf job) { super.configure(job); this.conf = job; this.position = new int[getNumChunks()]; this.outputDir = job.get("final.output.dir"); this.taskId = job.get("mapred.task.id"); this.checkSumType = CheckSum.fromString(job.get(VoldemortBuildAndPushJob.CHECKSUM_TYPE)); // These arrays are sparse if reducer.per.bucket is false and num.chunks > 1 this.checkSumDigestIndex = new CheckSum[getNumChunks()]; this.checkSumDigestValue = new CheckSum[getNumChunks()]; this.taskIndexFileName = new Path[getNumChunks()]; this.taskValueFileName = new Path[getNumChunks()]; this.indexFileStream = new DataOutputStream[getNumChunks()]; this.valueFileStream = new DataOutputStream[getNumChunks()]; this.indexFileSizeInBytes = new long[getNumChunks()]; this.valueFileSizeInBytes = new long[getNumChunks()]; String compressionCodec = conf.get(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC, NO_COMPRESSION_CODEC); if(conf.getBoolean(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS, false) && compressionCodec.toUpperCase(Locale.ENGLISH).equals(this.COMPRESSION_CODEC)) { this.fileExtension = GZIP_FILE_EXTENSION; this.isValidCompressionEnabled = true; } else { this.fileExtension = ""; this.isValidCompressionEnabled = false; } }
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean ignoreSeparatorOnNull = job.getBoolean("mapred.textoutputformat.ignore.separator", false); String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); splitSize = job.getLong(MR_REDUCE_MAX_FILE_PER_FILE, SPLIT_SIZE); jobConf = job; fileName = name; jobProgress = progress; Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec codec = ReflectionUtils.newInstance(codecClass, job); FSDataOutputStream fileOut = createFile(); return new MultiSplitRecordWriter<K, V>(new NewDataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, ignoreSeparatorOnNull); }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use .visibilityTxnId(getCompactorTxnId(jobConf)); //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector).writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()).reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)).bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use // delta_xxxx_yyyy format .visibilityTxnId(getCompactorTxnId(jobConf)); // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (deleteEventWriter == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } } }
private ShuffleTran generate(SparkPlan sparkPlan, SparkEdgeProperty edge, boolean toCache) { Preconditions.checkArgument(!edge.isShuffleNone(), "AssertionError: SHUFFLE_NONE should only be used for UnionWork."); SparkShuffler shuffler; if (edge.isMRShuffle()) { shuffler = new SortByShuffler(false, sparkPlan); } else if (edge.isShuffleSort()) { shuffler = new SortByShuffler(true, sparkPlan); } else { boolean useSparkGroupBy = jobConf.getBoolean("hive.spark.use.groupby.shuffle", true); if (!useSparkGroupBy) { LOG.info("hive.spark.use.groupby.shuffle is off. Use repartitin shuffle instead."); } shuffler = useSparkGroupBy ? new GroupByShuffler() : new RepartitionShuffler(); } return new ShuffleTran(sparkPlan, shuffler, edge.getNumPartitions(), toCache); }
public void initIOContextSortedProps(FileSplit split, RecordReader recordReader, JobConf job) { this.jobConf = job; this.getIOContext().resetSortingValues(); this.isSorted = jobConf.getBoolean("hive.input.format.sorted", false); this.rangeStart = split.getStart(); this.rangeEnd = split.getStart() + split.getLength(); this.splitEnd = rangeEnd; if (recordReader instanceof RCFileRecordReader && rangeEnd != 0 && this.isSorted) { // Binary search only works if we know the size of the split, and the recordReader is an // RCFileRecordReader this.getIOContext().setUseSorted(true); this.getIOContext().setBinarySearching(true); this.wasUsingSortedSearch = true; } else { // Use the defalut methods for next in the child class this.isSorted = false; } }
public void initIOContextSortedProps(FileSplit split, RecordReader recordReader, JobConf job) { this.jobConf = job; this.getIOContext().resetSortingValues(); this.isSorted = jobConf.getBoolean("hive.input.format.sorted", false); this.rangeStart = split.getStart(); this.rangeEnd = split.getStart() + split.getLength(); this.splitEnd = rangeEnd; if (recordReader instanceof RCFileRecordReader && rangeEnd != 0 && this.isSorted) { // Binary search only works if we know the size of the split, and the recordReader is an // RCFileRecordReader this.getIOContext().setUseSorted(true); this.getIOContext().setBinarySearching(true); this.wasUsingSortedSearch = true; } else { // Use the defalut methods for next in the child class this.isSorted = false; } }