@Override public TaskAttemptContext newTask(Configuration c, TaskAttemptID t) { return new TaskAttemptContextImpl(c, t); }
@Override public org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf, org.apache.hadoop.mapreduce.TaskAttemptID taskId) { return new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl( conf instanceof JobConf? new JobConf(conf) : conf, taskId); }
/** * Returns a TaskAttemptContext instance created from the given parameters. * @param job an instance of o.a.h.mapreduce.Job * @param taskId an identifier for the task attempt id. Should be parsable by * {@link TaskAttemptID#forName(String)} * @return a concrete TaskAttemptContext instance of o.a.h.mapreduce.TaskAttemptContext */ @Override @SuppressWarnings("unchecked") public <T, J> T createTestTaskAttemptContext(J job, String taskId) { Job j = (Job)job; return (T)new TaskAttemptContextImpl(j.getConfiguration(), TaskAttemptID.forName(taskId)); } }
@Override public void open(HadoopInputSplit split) throws IOException { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.hCatInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } }
this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID); this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context); this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
@Override public void finalizeGlobal(int parallelism) throws IOException { JobContext jobContext; TaskAttemptContext taskContext; try { TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0") + Integer.toString(1) + "_0"); jobContext = new JobContextImpl(this.configuration, new JobID()); taskContext = new TaskAttemptContextImpl(this.configuration, taskAttemptID); this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext); } catch (Exception e) { throw new RuntimeException(e); } jobContext.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobContext.getCredentials().addAll(currentUserCreds); } // finalize HDFS output format if (this.outputCommitter != null) { this.outputCommitter.commitJob(jobContext); } }
@Override public void open(HadoopInputSplit split) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.mapreduceInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } } }
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }
@Test public void testRecordReader() { List<KafkaWritable> serRecords = expectedRecords.stream() .map((consumerRecord) -> new KafkaWritable(consumerRecord.partition(), consumerRecord.offset(), consumerRecord.timestamp(), consumerRecord.value(), consumerRecord.key())) .collect(Collectors.toList()); KafkaRecordReader recordReader = new KafkaRecordReader(); TaskAttemptContext context = new TaskAttemptContextImpl(this.conf, new TaskAttemptID()); recordReader.initialize(new KafkaInputSplit(currentTopic, 0, 50L, 100L, null), context); for (int i = 50; i < 100; ++i) { KafkaWritable record = new KafkaWritable(); Assert.assertTrue(recordReader.next(null, record)); Assert.assertEquals(serRecords.get(i), record); } recordReader.close(); }
private static SortedSet<byte[]> readFileToSearch(final Configuration conf, final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException, InterruptedException { SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is // what is missing. TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr = new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) { InputSplit is = new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {}); rr.initialize(is, context); while (rr.nextKeyValue()) { rr.getCurrentKey(); BytesWritable bw = rr.getCurrentValue(); if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) { byte[] key = new byte[rr.getCurrentKey().getLength()]; System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey() .getLength()); result.add(key); } } } return result; } }
/** * Create a new task attempt context. * @param conf config * @return a new context */ private TaskAttemptContext taskAttempt(Configuration conf) { return new TaskAttemptContextImpl(conf, taskAttemptID); }
@Test public void testInvalidVersionNumber() throws IOException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); try { new FileOutputCommitter(outDir, tContext); fail("should've thrown an exception!"); } catch (IOException e) { //test passed } }
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
TaskReporter reporter = Mockito.mock(TaskReporter.class); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(conf, taskAttemptID,reporter);
version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);