@Override public CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException { long minSize = job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 0); // For backward compatibility, let the above parameter be used if (job.getLong( org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 0) == 0) { super.setMinSplitSizeNode(minSize); } if (job.getLong( org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 0) == 0) { super.setMinSplitSizeRack(minSize); } if (job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, 0) == 0) { super.setMaxSplitSize(minSize); } InputSplit[] splits = super.getSplits(job, numSplits); ArrayList<InputSplitShim> inputSplitShims = new ArrayList<InputSplitShim>(); for (int pos = 0; pos < splits.length; pos++) { CombineFileSplit split = (CombineFileSplit) splits[pos]; if (split.getPaths().length > 0) { inputSplitShims.add(new InputSplitShim(job, split.getPaths(), split.getStartOffsets(), split.getLengths(), split.getLocations())); } } return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]); }
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean ignoreSeparatorOnNull = job.getBoolean("mapred.textoutputformat.ignore.separator", false); String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); splitSize = job.getLong(MR_REDUCE_MAX_FILE_PER_FILE, SPLIT_SIZE); jobConf = job; fileName = name; jobProgress = progress; Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec codec = ReflectionUtils.newInstance(codecClass, job); FSDataOutputStream fileOut = createFile(); return new MultiSplitRecordWriter<K, V>(new NewDataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, ignoreSeparatorOnNull); }
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector).writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()).reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)).bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use // delta_xxxx_yyyy format .visibilityTxnId(getCompactorTxnId(jobConf)); // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use .visibilityTxnId(getCompactorTxnId(jobConf)); //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (deleteEventWriter == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } } }
public void configure (JobConf job) { try { pages = job.getLong("pages", 0); slots = job.getLong("slots", 0); visits = job.getLong("visits", 0); delim = job.get("delimiter"); visit = new Visit(DistributedCache.getLocalCacheFiles(job), delim, pages); vitem = new JoinBytesInt(); vitem.refs = 1; } catch (IOException e) { e.printStackTrace(); } }
HtmlCore(JobConf job) throws IOException { pages = job.getLong("pages", 0); slotpages = job.getLong("slotpages", 0); slots = (int) Math.ceil((pages * 1.0 / slotpages)); outpages = (long) Math.floor(pages * epercent); totalpages = pages + outpages; dict = Utils.getDict(job); if (DataGen.DEBUG_MODE) { printDict(); } wordids = new long[getMeanContentLength() * 2]; linkids = new long[wordids.length]; try { this.wzipf = Utils.getSharedWordZipfCore(job); this.lzipf = Utils.getSharedLinkZipfCore(job); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public void configure (JobConf job) { try { pages = job.getLong("pages", 0); delim = job.get("delimiter"); pid = job.getInt("mapred.task.partition", 0); visit = new Visit(DistributedCache.getLocalCacheFiles(job), delim, pages); visit.fireRandom(pid + 1); vitem = new JoinBytesInt(); errors = 0; missed = 0; } catch (IOException e) { e.printStackTrace(); } }
@Override public Estimation estimate(JobConf job, TableScanOperator ts, long remaining) throws HiveException { String hiveTableName = ts.getConf().getTableMetadata().getTableName(); int reducerCount = job.getInt(hiveTableName + PhoenixStorageHandlerConstants .PHOENIX_REDUCER_NUMBER, 1); if (LOG.isDebugEnabled()) { LOG.debug("Estimating input size for table: " + hiveTableName + " with reducer count " + reducerCount + ". Remaining : " + remaining); } long bytesPerReducer = job.getLong(HiveConf.ConfVars.BYTESPERREDUCER.varname, Long.parseLong(HiveConf.ConfVars.BYTESPERREDUCER.getDefaultValue())); long totalLength = reducerCount * bytesPerReducer; return new Estimation(0, totalLength); } }
JobConf conf = new JobConf(); conf.getLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL, 0); conf.setLong(MRJobConfig.TASK_LOCAL_WRITE_LIMIT_BYTES, limit); LocalFileSystem localFS = FileSystem.getLocal(conf);
/** * Ensure that M/R 1.x applications can get and set task virtual memory with * old property names */ @SuppressWarnings("deprecation") @Test (timeout = 10000) public void testDeprecatedPropertyNameForTaskVmem() { JobConf configuration = new JobConf(); configuration.setLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, 1024); configuration.setLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, 1024); Assert.assertEquals(1024, configuration.getMemoryForMapTask()); Assert.assertEquals(1024, configuration.getMemoryForReduceTask()); // Make sure new property names aren't broken by the old ones configuration.setLong(JobConf.MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY, 1025); configuration.setLong(JobConf.MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY, 1025); Assert.assertEquals(1025, configuration.getMemoryForMapTask()); Assert.assertEquals(1025, configuration.getMemoryForReduceTask()); configuration.setMemoryForMapTask(2048); configuration.setMemoryForReduceTask(2048); Assert.assertEquals(2048, configuration.getLong( JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, -1)); Assert.assertEquals(2048, configuration.getLong( JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, -1)); // Make sure new property names aren't broken by the old ones Assert.assertEquals(2048, configuration.getLong( JobConf.MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY, -1)); Assert.assertEquals(2048, configuration.getLong( JobConf.MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY, -1)); }
@Override public void configure(JobConf jobConf) { sleepTimeMillis = jobConf.getLong(SLEEP_TIME_MILLIS_KEY, sleepTimeMillis); System.out.println("Configuring MR to sleep for" + sleepTimeMillis + " millis."); }
public void configure(JobConf job) { this.mapSleepCount = job.getInt("sleep.job.map.sleep.count", mapSleepCount); this.reduceSleepCount = job.getInt("sleep.job.reduce.sleep.count", reduceSleepCount); this.mapSleepDuration = job.getLong("sleep.job.map.sleep.time" , 100) / mapSleepCount; this.reduceSleepDuration = job.getLong("sleep.job.reduce.sleep.time" , 100) / reduceSleepCount; }