Refine search
/** * Gets the target output stream where the Avro container file should be written. * * @param context The task attempt context. * @return The target output stream. */ protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException { Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(), getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT)); return path.getFileSystem(context.getConfiguration()).create(path); }
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException { Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration()); if (schema != null) { return schema; } Path path = split.getPath(idx); FileSystem fs = path.getFileSystem(cx.getConfiguration()); return AvroUtils.getSchemaFromDataFile(path, fs); }
public MultiRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>(); String[] aliases = getOutputFormatAliases(context); for (String alias : aliases) { LOGGER.info("Creating record writer for alias: " + alias); TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); Configuration aliasConf = aliasContext.getConfiguration(); // Create output directory if not already created. String outDir = aliasConf.get("mapred.output.dir"); if (outDir != null) { Path outputDir = new Path(outDir); FileSystem fs = outputDir.getFileSystem(aliasConf); if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); } } OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext); baseRecordWriters.put(alias, new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), aliasContext)); } }
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName()); String jobInfoString = context.getConfiguration().get( HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass(); AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration()); context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName()); if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){ (org.apache.hadoop.mapred.RecordWriter)null, context); } else { Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part"))); parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)),
final String finalSegmentString = taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY); if (finalSegmentString == null) { throw new IOException("Could not read final segment"); taskContext.progress(); final FileSystem fs = taskContext.getWorkingDirectory().getFileSystem(taskContext.getConfiguration()); final Path taskAttemptDir = getTaskPath( context.getJobID(),
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; conf = context.getConfiguration(); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); LOG.info("Initialize HFileRecordReader for {}", path); this.in = HFile.createReader(fs, path, conf); // The file info must be loaded before the scanner can be used. // This seems like a bug in HBase, but it's easily worked around. this.in.loadFileInfo(); this.scanner = in.getScanner(false, false); }
/** * Constructor. * @param context The TaskAttempContext to supply the writer with information form the job configuration */ public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException { schema = initSchema(context); meta = filterMetadata(context.getConfiguration()); writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); Path outputPath = FileOutputFormat.getOutputPath(context); String dir = FileOutputFormat.getUniqueFile(context, "part", ""); dirPath = new Path(outputPath.toString() + "/" + dir); fs = dirPath.getFileSystem(context.getConfiguration()); fs.mkdirs(dirPath); blockSize = fs.getDefaultBlockSize(); }
Path outputPath = committer.getWorkPath(); FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); if (getCompressOutput(task)) { Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);
public void commitTask(TaskAttemptContext context) throws IOException { Path workPath = getWorkPath(); FileSystem fs = workPath.getFileSystem(context.getConfiguration());
final Configuration conf = context.getConfiguration(); final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ; final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY); + " cannot be empty"); final FileSystem fs = outputDir.getFileSystem(conf);
final Configuration conf = context.getConfiguration(); LOG.debug("Task output path: " + outputdir); final FileSystem fs = outputdir.getFileSystem(conf);
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSystem fs = outputFile.getFileSystem(conf);
public CustomRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{ path = split.getPath(index); fs = path.getFileSystem(context.getConfiguration()); startOffset = split.getOffset(index); endOffset = startOffset + split.getLength(index); fileIn = fs.open(path); reader = new LineReader(fileIn); pos = startOffset; }
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false;
final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf);
when(fs.delete(eq(p0), eq(true))).thenReturn(true); when(fs.delete(eq(p1), eq(true))).thenReturn(true); doReturn(fs).when(p).getFileSystem(any(Configuration.class)); when(fs.makeQualified(eq(p))).thenReturn(a); when(context.getConfiguration()).thenReturn(conf);
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { final FileSplit split = (FileSplit) genericSplit; final Configuration configuration = context.getConfiguration(); if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER); this.gryoReader = GryoReader.build().mapper( GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create(); long start = split.getStart(); final Path file = split.getPath(); if (null != new CompressionCodecFactory(configuration).getCodec(file)) { throw new IllegalStateException("Compression is not supported for the (binary) Gryo format"); } // open the file and seek to the start of the split this.inputStream = file.getFileSystem(configuration).open(split.getPath()); this.splitLength = split.getLength(); if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }