/** * {@inheritDoc} */ @Override public RecordReader<NullWritable, GenericRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { Schema readerSchema = AvroJob.getInputValueSchema(context.getConfiguration()); if (readerSchema == null) { String schemaFilePath = context.getConfiguration().get(CONF_INPUT_VALUE_SCHEMA_PATH); if (StringUtils.isNotBlank(schemaFilePath)) { log.info("Using file: %s as reader schema.", schemaFilePath); try (FSDataInputStream inputStream = FileSystem.get(context.getConfiguration()).open(new Path(schemaFilePath))) { readerSchema = new Schema.Parser().parse(inputStream); } } } if (null == readerSchema) { log.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired."); log.info("Using a reader schema equal to the writer schema."); } return new AvroValueRecordReader(readerSchema); } }
final String finalSegmentString = taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY); if (finalSegmentString == null) { throw new IOException("Could not read final segment"); ); log.info("Committing new segment [%s]", newSegment); taskContext.progress(); final FileSystem fs = taskContext.getWorkingDirectory().getFileSystem(taskContext.getConfiguration()); final Path taskAttemptDir = getTaskPath( context.getJobID(), context.getTaskAttemptID(), taskContext.getWorkingDirectory() ); final Path taskAttemptFile = new Path(taskAttemptDir, DATA_FILE_KEY); final Path taskAttemptSuccess = new Path(taskAttemptDir, DATA_SUCCESS_KEY); try (final OutputStream outputStream = fs.create(taskAttemptFile, false, 1 << 10, commitProgressable)) { outputStream.write(HadoopDruidConverterConfig.jsonMapper.writeValueAsBytes(newSegment)); fs.create(taskAttemptSuccess, false).close(); taskContext.progress(); taskContext.setStatus("Committed");
private String generateKey(TaskAttemptContext context) throws IOException { String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE. throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID()); } OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation(); }
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
FileSystem fs = FileSystem.get(context.getConfiguration()); if (EtlMultiOutputFormat.isRunMoveData(context)) { Path workPath = super.getWorkPath(); Path baseOutDir = EtlMultiOutputFormat.getDestinationPath(context); log.info("Destination base path: " + baseOutDir); for (FileStatus f : fs.listStatus(workPath)) { String file = f.getPath().getName(); log.info("work file: " + file); if (file.startsWith("data")) { getPartitionedPath(context, file, count.getEventCount(), count.getLastKey().getOffset()); Path dest = new Path(baseOutDir, partitionedFile); if (!fs.exists(dest.getParent())) { mkdirs(fs, dest.getParent()); Path tempPath = new Path(workPath, "counts." + context.getConfiguration().get("mapred.task.id")); OutputStream outputStream = new BufferedOutputStream(fs.create(tempPath)); ObjectMapper mapper = new ObjectMapper(); SequenceFile.Writer offsetWriter = SequenceFile.createWriter(fs, context.getConfiguration(), new Path(super.getWorkPath(), EtlMultiOutputFormat.getUniqueFile(context, EtlMultiOutputFormat.OFFSET_PREFIX, "")),
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName()); String jobInfoString = context.getConfiguration().get( HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass(); AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration()); context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName()); if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){ (org.apache.hadoop.mapred.RecordWriter)null, context); } else { Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part"))); parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)),
static void setWorkOutputPath(TaskAttemptContext context) throws IOException { String outputPath = context.getConfiguration().get("mapred.output.dir"); //we need to do this to get the task path and set it for mapred implementation //since it can't be done automatically because of mapreduce->mapred abstraction if (outputPath != null) context.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); } }
final Configuration conf = context.getConfiguration(); final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ; final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY); if (writeTableNames==null || writeTableNames.isEmpty()) { throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY + " cannot be empty"); final FileSystem fs = outputDir.getFileSystem(conf); final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); Path writerPath = null; if (writeMultipleTables) { writerPath = new Path(outputDir,new Path(tableRelPath, Bytes .toString(family)));
throws FileNotFoundException, IllegalArgumentException, IOException { Configuration conf = new Configuration(false); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); TaskAttemptID tid0 = new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3); Path p = spy(new Path("/user/hadoop/out")); Path a = new Path("hdfs://user/hadoop/out"); Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0"); Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1"); Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0"); when(fs.exists(eq(p0))).thenReturn(true); when(fs.exists(eq(p1))).thenReturn(true); when(fs.exists(eq(p2))).thenReturn(true); when(fs.exists(eq(p3))).thenReturn(false); when(fs.delete(eq(p0), eq(true))).thenReturn(true); when(context.getTaskAttemptID()).thenReturn(tid0); when(context.getConfiguration()).thenReturn(conf);
/** * Gets the target output stream where the Avro container file should be written. * * @param context The task attempt context. * @return The target output stream. */ protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException { Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(), getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT)); return path.getFileSystem(context.getConfiguration()).create(path); }
public MultiRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>(); String[] aliases = getOutputFormatAliases(context); for (String alias : aliases) { LOGGER.info("Creating record writer for alias: " + alias); TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); Configuration aliasConf = aliasContext.getConfiguration(); // Create output directory if not already created. String outDir = aliasConf.get("mapred.output.dir"); if (outDir != null) { Path outputDir = new Path(outDir); FileSystem fs = outputDir.getFileSystem(aliasConf); if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); } } OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext); baseRecordWriters.put(alias, new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), aliasContext)); } }
public void commitTask(TaskAttemptContext context) throws IOException { Path workPath = getWorkPath(); FileSystem fs = workPath.getFileSystem(context.getConfiguration()); if (fs.exists(workPath)) { long recordCount = getRecordCountFromCounter(context, AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT); String fileNamePrefix; for (FileStatus status : fs.listStatus(workPath, new PathFilter() { @Override public boolean accept(Path path) { Path newPath = new Path(status.getPath().getParent(), fileName); LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath)); fs.rename(status.getPath(), newPath);
Configuration conf = new Configuration(); expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); expect(context.getConfiguration()).andReturn(conf).anyTimes();
/** * Constructor. * @param context The TaskAttempContext to supply the writer with information form the job configuration */ public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException { schema = initSchema(context); meta = filterMetadata(context.getConfiguration()); writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); Path outputPath = FileOutputFormat.getOutputPath(context); String dir = FileOutputFormat.getUniqueFile(context, "part", ""); dirPath = new Path(outputPath.toString() + "/" + dir); fs = dirPath.getFileSystem(context.getConfiguration()); fs.mkdirs(dirPath); blockSize = fs.getDefaultBlockSize(); }
@Override public void write(Object dummy, Canvas r) throws IOException { String suffix = String.format("-%05d.png", canvasesWritten++); Path p = new Path(outPath.getParent(), outPath.getName()+suffix); FSDataOutputStream outFile = outFS.create(p); // Write the merged canvas plotter.writeImage(r, outFile, this.vflip); outFile.close(); task.progress(); java.awt.Point imageLocation = mergedCanvas.projectToImageSpace(r.inputMBR.x1, r.inputMBR.y2); masterFile.printf("%d,%d,%s\n", imageLocation.x, imageHeight - imageLocation.y, p.getName()); }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Path filePath = fileSplit.getPath(); FileSystem fileSys = filePath.getFileSystem(context.getConfiguration()); shpInputStream = fileSys.open(filePath); //assign inputstream to parser and parse file header to init; parser = new ShpFileParser(shpInputStream); parser.parseShapeFileHead(); }
Path outputPath = committer.getWorkPath(); FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); if (getCompressOutput(task)) { Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else {