public ExampleHttpJsonScanner(Configuration conf, Schema schema, TableMeta tableMeta, Fragment fragment) throws IOException { super(conf, schema, tableMeta, fragment); reader = new ExampleHttpJsonLineReader(conf, this.fragment, conf.getInt(READ_BUFFER_SIZE, 128 * StorageUnit.KB)); if (!this.reader.isCompressed()) { splittable = true; } startOffset = this.fragment.getStartKey(); endOffset = this.fragment.getEndKey(); maxAllowedErrorCount = Integer.parseInt(tableMeta.getProperty(TEXT_ERROR_TOLERANCE_MAXNUM, DEFAULT_TEXT_ERROR_TOLERANCE_MAXNUM)); }
private void initChannel() throws IOException { FileSystem fs = FileScanner.getFileSystem((TajoConf) conf, fragment.getPath()); if (fs instanceof LocalFileSystem) { File file; try { if (fragment.getPath().toUri().getScheme() != null) { file = new File(fragment.getPath().toUri()); } else { file = new File(fragment.getPath().toString()); } } catch (IllegalArgumentException iae) { throw new IOException(iae); } channel = new LocalFileInputChannel(new FileInputStream(file)); } else { channel = new FSDataInputChannel(fs.open(fragment.getPath())); } // initial set position if (fragment.getStartKey() > 0) { channel.seek(fragment.getStartKey()); } filePosition = fragment.getStartKey(); endOffset = fragment.getStartKey() + fragment.getLength(); if (LOG.isDebugEnabled()) { LOG.debug("RawFileScanner open:" + fragment.getPath() + ", offset :" + fragment.getStartKey() + ", fragment length :" + fragment.getLength()); } }
@Override public Object clone() throws CloneNotSupportedException { FileFragment frag = (FileFragment) super.clone(); frag.diskIds = diskIds; return frag; }
public RCFileScanner(Configuration conf, final Schema schema, final TableMeta meta, final Fragment fragment) throws IOException { super(conf, schema, meta, fragment); conf.setInt("io.file.buffer.size", 4096); //TODO remove startOffset = this.fragment.getStartKey(); endOffset = startOffset + this.fragment.getLength(); start = 0; }
File file; try { if (fragment.getPath().toUri().getScheme() != null) { file = new File(fragment.getPath().toUri()); } else { file = new File(fragment.getPath().toString()); filePosition = startOffset = fragment.getStartKey(); endOffset = fragment.getEndKey(); + ", fragment length :" + fragment.getLength()); if (fragment.getStartKey() > 0) { channel.position(fragment.getStartKey());
this.path = fragment.getPath(); this.codec = codec; this.types = types; long offset = fragment.getStartKey(); long maxOffset = fragment.getEndKey(); for(StripeInformation stripe: stripes) { long stripeStart = stripe.getOffset();
public OrcScanner(Configuration conf, Schema schema, TableMeta meta, Fragment fragment) throws IOException { super(conf, schema, meta, fragment); this.path = this.fragment.getPath(); this.fileSystem = this.path.getFileSystem(conf); }
@Override public float getProgress() { if(eos) { return 1.0f; } long readBytes = filePosition - startOffset; if (readBytes == 0) { return 0.0f; } else { return Math.min(1.0f, ((float) readBytes / fragment.getLength())); } } }
@Override public void reset() throws IOException { // reset the buffer buffer.clear(); forceFillBuffer = true; filePosition = fragment.getStartKey(); recordCount = 0; channel.position(filePosition); eos = false; }
fs = FileScanner.getFileSystem((TajoConf) conf, fragment.getPath()); pos = startOffset = fragment.getStartKey(); end = fragment.getEndKey(); fis = fs.open(fragment.getPath()); File file; try { if (fragment.getPath().toUri().getScheme() != null) { file = new File(fragment.getPath().toUri()); } else { file = new File(fragment.getPath().toString()); is = inputStream; lineReader = new ByteBufLineReader(new LocalFileInputChannel(inputStream), BufferPool.directBuffer((int) Math.min(bufferSize, fragment.getLength()))); } else { fis = fs.open(fragment.getPath()); fis.seek(startOffset); is = fis; lineReader = new ByteBufLineReader(new FSDataInputChannel(fis), BufferPool.directBuffer((int) Math.min(bufferSize, fragment.getLength())));
@Override public float getProgress() { if(!inited) return 0.0f; if(eos) { return 1.0f; } long readBytes = filePosition - fragment.getStartKey(); if (readBytes == 0) { return 0.0f; } else { return Math.min(1.0f, ((float) readBytes / fragment.getLength())); } } }
/** * Returns the name of the file. */ @Override public String toString() { return fragment.getPath().toString(); }
@Override public void seek(long offset) throws IOException { eos = false; filePosition = channel.position(); // do not fill the buffer if the offset is already included in the buffer. if(!forceFillBuffer && filePosition > offset && offset > filePosition - buffer.limit()){ buffer.position((int)(offset - (filePosition - buffer.limit()))); } else { if(offset < startOffset || offset > startOffset + fragment.getLength()){ throw new IndexOutOfBoundsException(String.format("range(%d, %d), offset: %d", startOffset, startOffset + fragment.getLength(), offset)); } channel.position(offset); filePosition = offset; buffer.clear(); forceFillBuffer = true; fillBuffer(); } }
@Override public void reset() throws IOException { // reload initial buffer filePosition = fragment.getStartKey(); recordCount = 0; seek(filePosition); eos = false; }
fs = FileScanner.getFileSystem((TajoConf)conf, fragment.getPath()); reader = new SequenceFile.Reader(fs, fragment.getPath(), conf); this.start = fragment.getStartKey(); this.end = start + fragment.getLength(); if (fragment.getStartKey() > reader.getPosition()) reader.sync(this.start);
@Override public TableStats getInputStats() { if(inputStats != null){ inputStats.setNumRows(recordCount); inputStats.setReadBytes(filePosition - fragment.getStartKey()); // actual read bytes (scan + rescan * n) inputStats.setNumBytes(fragment.getLength()); } return inputStats; }
@Override public void init() throws IOException { reader.init(); if (targets == null) { targets = schema.toArray(); } reset(); super.init(); if (LOG.isDebugEnabled()) { LOG.debug("DelimitedTextFileScanner open:" + fragment.getPath() + "," + startOffset + "," + endOffset); } }
public DelimitedTextFileScanner(Configuration conf, final Schema schema, final TableMeta meta, final Fragment fragment) throws IOException { super(conf, schema, meta, fragment); reader = new DelimitedLineReader(conf, this.fragment, conf.getInt(READ_BUFFER_SIZE, 128 * StorageUnit.KB)); if (!reader.isCompressed()) { splittable = true; } startOffset = this.fragment.getStartKey(); endOffset = this.fragment.getEndKey(); errorTorrenceMaxNum = Integer.parseInt(meta.getProperty(TEXT_ERROR_TOLERANCE_MAXNUM, DEFAULT_TEXT_ERROR_TOLERANCE_MAXNUM)); }
@Override public float getProgress() { if(!inited) return super.getProgress(); try { if(!more) { return 1.0f; } long filePos = getPosition(); if (startOffset == filePos) { return 0.0f; } else { //if scanner read the header, filePos moved to zero return Math.min(1.0f, (float)(Math.max(filePos - startOffset, 0)) / (float)(fragment.getLength())); } } catch (IOException e) { LOG.error(e.getMessage(), e); return 0.0f; } }
@Override public void close() throws IOException { if (inputStats != null) { inputStats.setReadBytes(filePosition - fragment.getStartKey()); inputStats.setNumRows(recordCount); } if(tupleBuffer != null) { tupleBuffer.release(); tupleBuffer = null; } reader = null; IOUtils.cleanup(LOG, channel); }