public HadoopDecompressor(CompressionCodec codec) { this.codec = requireNonNull(codec, "codec is null"); decompressor = CodecPool.getDecompressor(codec); }
public Decompressor getDecompressor() { CompressionCodec codec = getCodec(conf); if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool."); if (decompressor != null) { if (decompressor.finished()) { // Somebody returns the decompressor to CodecPool but is still using it. LOG.warn("Deompressor obtained from CodecPool is already finished()"); } decompressor.reset(); } return decompressor; } return null; }
public FlexibleDelimitedFileReader(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); InputStream inputStream = fs.open(fsPath); this.mReader = (codec == null) ? new BufferedInputStream(inputStream) : new BufferedInputStream( codec.createInputStream(inputStream, mDecompressor = CodecPool.getDecompressor(codec))); this.mOffset = path.getOffset(); }
public DelimitedTextFileReader(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); InputStream inputStream = fs.open(fsPath); this.mReader = (codec == null) ? new BufferedInputStream(inputStream) : new BufferedInputStream( codec.createInputStream(inputStream, mDecompressor = CodecPool.getDecompressor(codec))); this.mOffset = path.getOffset(); }
public Decompressor getDecompressor() throws IOException { CompressionCodec codec = getCodec(); if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); if (decompressor != null) { if (decompressor.finished()) { // Somebody returns the decompressor to CodecPool but is still using // it. LOG.warn("Deompressor obtained from CodecPool already finished()"); } else { if(LOG.isDebugEnabled()) { LOG.debug("Got a decompressor: " + decompressor.hashCode()); } } /** * Following statement is necessary to get around bugs in 0.18 where a * decompressor is referenced after returned back to the codec pool. */ decompressor.reset(); } return decompressor; } return null; }
/** * Create an input stream with a codec taken from the global CodecPool. * * @param codec The codec to use to create the input stream. * @param conf The configuration to use if we need to create a new codec. * @param in The input stream to wrap. * @return The new input stream * @throws IOException */ static CompressionInputStream createInputStreamWithCodecPool( CompressionCodec codec, Configuration conf, InputStream in) throws IOException { Decompressor decompressor = CodecPool.getDecompressor(codec); CompressionInputStream stream = null; try { stream = codec.createInputStream(in, decompressor); } finally { if (stream == null) { CodecPool.returnDecompressor(decompressor); } else { stream.setTrackedDecompressor(decompressor); } } return stream; } }
private ByteBuffer decompress(CompressionCodec compressor, InputStream cellBlockStream, int osInitialSize) throws IOException { // GZIPCodec fails w/ NPE if no configuration. if (compressor instanceof Configurable) { ((Configurable) compressor).setConf(this.conf); } Decompressor poolDecompressor = CodecPool.getDecompressor(compressor); CompressionInputStream cis = compressor.createInputStream(cellBlockStream, poolDecompressor); ByteBufferOutputStream bbos; try { // TODO: This is ugly. The buffer will be resized on us if we guess wrong. // TODO: Reuse buffers. bbos = new ByteBufferOutputStream(osInitialSize); IOUtils.copy(cis, bbos); bbos.close(); return bbos.getByteBuffer(); } finally { CodecPool.returnDecompressor(poolDecompressor); } } }
valBuffer = new DataInputBuffer(); if (decompress) { valDecompressor = CodecPool.getDecompressor(codec); valInFilter = codec.createInputStream(valBuffer, valDecompressor); valIn = new DataInputStream(valInFilter); valLenBuffer = new DataInputBuffer(); keyLenDecompressor = CodecPool.getDecompressor(codec); keyLenInFilter = codec.createInputStream(keyLenBuffer, keyLenDecompressor); keyLenIn = new DataInputStream(keyLenInFilter); keyDecompressor = CodecPool.getDecompressor(codec); keyInFilter = codec.createInputStream(keyBuffer, keyDecompressor); keyIn = new DataInputStream(keyInFilter); valLenDecompressor = CodecPool.getDecompressor(codec); valLenInFilter = codec.createInputStream(valLenBuffer, valLenDecompressor);
public Decompressor getDecompressor() { CompressionCodec codec = getCodec(); if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); if (decompressor != null) { if (decompressor.finished()) { // Somebody returns the decompressor to CodecPool but is still using // it. log.warn("Decompressor obtained from CodecPool already finished()"); } else { log.debug("Got a decompressor: {}", decompressor.hashCode()); } /** * Following statement is necessary to get around bugs in 0.18 where a decompressor is * referenced after returned back to the codec pool. */ decompressor.reset(); } return decompressor; } return null; }
private ByteBuffer decompress(CompressionCodec compressor, InputStream cellBlockStream, int osInitialSize) throws IOException { // GZIPCodec fails w/ NPE if no configuration. if (compressor instanceof Configurable) { ((Configurable) compressor).setConf(this.conf); } Decompressor poolDecompressor = CodecPool.getDecompressor(compressor); CompressionInputStream cis = compressor.createInputStream(cellBlockStream, poolDecompressor); ByteBufferOutputStream bbos; try { // TODO: This is ugly. The buffer will be resized on us if we guess wrong. // TODO: Reuse buffers. bbos = new ByteBufferOutputStream(osInitialSize); IOUtils.copy(cis, bbos); bbos.close(); return bbos.getByteBuffer(); } finally { CodecPool.returnDecompressor(poolDecompressor); } } }
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); //noinspection StatementWithEmptyBody while (reader.next(key, value)) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
public BytesDecompressor(CompressionCodec codec) { this.codec = codec; if (codec != null) { decompressor = CodecPool.getDecompressor(codec); } else { decompressor = null; } }
public BytesDecompressor(CompressionCodec codec) { this.codec = codec; if (codec != null) { decompressor = CodecPool.getDecompressor(codec); } else { decompressor = null; } }
public BytesDecompressor(CompressionCodec codec) { this.codec = codec; if (codec != null) { decompressor = CodecPool.getDecompressor(codec); } else { decompressor = null; } }
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
public HadoopDecompressor(CompressionCodec codec) { this.codec = requireNonNull(codec, "codec is null"); decompressor = CodecPool.getDecompressor(codec); }
HeapBytesDecompressor(CompressionCodecName codecName) { this.codec = getCodec(codecName); if (codec != null) { decompressor = CodecPool.getDecompressor(codec); } else { decompressor = null; } }
private InputStream createInputStream(Configuration job, final Path file) throws IOException { final FileSystem fs = file.getFileSystem(job); InputStream in = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { decompressor = CodecPool.getDecompressor(codec); in = codec.createInputStream(in, decompressor); } return in; }
@Override public Boolean call() throws Exception { Decompressor c = CodecPool.getDecompressor(codec); queue.put(c); return c != null; } };
@Override public Boolean call() throws Exception { Decompressor c = CodecPool.getDecompressor(codec); queue.put(c); return c != null; } };