@Override public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws IOException { if (files.size() <= 0) { return false; } for (int fileId = 0; fileId < files.size(); fileId++) { RCFile.Reader reader = null; try { reader = new RCFile.Reader(fs, files.get(fileId) .getPath(), conf); reader.close(); reader = null; } catch (IOException e) { return false; } finally { if (null != reader) { reader.close(); } } } return true; } }
@Override public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws IOException { if (files.size() <= 0) { return false; } for (int fileId = 0; fileId < files.size(); fileId++) { RCFile.Reader reader = null; try { reader = new RCFile.Reader(fs, files.get(fileId) .getPath(), conf); reader.close(); reader = null; } catch (IOException e) { return false; } finally { if (null != reader) { reader.close(); } } } return true; } }
public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException { boolean blockPointer = false; long blockStart = -1; FileSplit fileSplit = split; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(job); if (inputFormatClass.getName().contains("SequenceFile")) { SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job); blockPointer = in.isBlockCompressed(); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } else if (recordReader instanceof RCFileRecordReader) { blockPointer = true; blockStart = ((RCFileRecordReader) recordReader).getStart(); } else if (inputFormatClass.getName().contains("RCFile")) { blockPointer = true; RCFile.Reader in = new RCFile.Reader(fs, path, job); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } this.jobConf = job; this.initIOContext(blockStart, blockPointer, path.makeQualified(fs)); this.initIOContextSortedProps(split, recordReader, job); }
public RCFileRecordReader(Configuration conf, FileSplit split) throws IOException { Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; if (split.getStart() > in.getPosition()) { in.sync(split.getStart()); // sync to start } this.start = in.getPosition(); more = start < end; }
public RCFileBlockMergeRecordReader(Configuration conf, FileSplit split) throws IOException { path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; if (split.getStart() > in.getPosition()) { in.sync(split.getStart()); // sync to start } this.start = in.getPosition(); more = start < end; }
writer.close(); RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
new Text("hive and hadoop"), null, null}; RCFile.Reader reader = new RCFile.Reader(fs, file, conf); assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);
long start = System.currentTimeMillis(); ColumnProjectionUtils.setReadAllColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
readCols.add(Integer.valueOf(3)); ColumnProjectionUtils.appendReadColumns(conf, readCols); RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException { boolean blockPointer = false; long blockStart = -1; FileSplit fileSplit = split; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(job); if (inputFormatClass.getName().contains("SequenceFile")) { SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job); blockPointer = in.isBlockCompressed(); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } else if (recordReader instanceof RCFileRecordReader) { blockPointer = true; blockStart = ((RCFileRecordReader) recordReader).getStart(); } else if (inputFormatClass.getName().contains("RCFile")) { blockPointer = true; RCFile.Reader in = new RCFile.Reader(fs, path, job); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } this.jobConf = job; this.initIOContext(blockStart, blockPointer, path.makeQualified(fs)); this.initIOContextSortedProps(split, recordReader, job); }
readCols.add(Integer.valueOf(allColumnsNumber - 1)); ColumnProjectionUtils.appendReadColumns(conf, readCols); RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
public int performRCFileReadFirstColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException { byte[][] checkBytes = null; BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber); if (chechCorrect) { resetRandomGenerators(); checkBytes = new byte[allColumnsNumber][]; } int actualReadCount = 0; java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>(); readCols.add(Integer.valueOf(0)); ColumnProjectionUtils.appendReadColumns(conf, readCols); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); boolean ok = true; if (chechCorrect) { nextRandomRow(checkBytes, checkRow); ok = ok && (checkRow.get(0).equals(cols.get(0))); } if (!ok) { throw new IllegalStateException("Compare read and write error."); } actualReadCount++; } return actualReadCount; }
public int performRCFileFullyReadColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException { byte[][] checkBytes = null; BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber); if (chechCorrect) { resetRandomGenerators(); checkBytes = new byte[allColumnsNumber][]; } int actualReadCount = 0; ColumnProjectionUtils.setReadAllColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); boolean ok = true; if (chechCorrect) { nextRandomRow(checkBytes, checkRow); ok = ok && checkRow.equals(cols); } if (!ok) { throw new IllegalStateException("Compare read and write error."); } actualReadCount++; } return actualReadCount; }
public RCFileRecordReader(Configuration conf, FileSplit split) throws IOException { Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; this.split = split; useCache = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEUSERCFILESYNCCACHE); if (split.getStart() > in.getPosition()) { long oldSync = useCache ? syncCache.get(split) : -1; if(oldSync == -1) { in.sync(split.getStart()); // sync to start } else { in.seek(oldSync); } } this.start = in.getPosition(); more = start < end; }
public RCFileRecordReader(Configuration conf, FileSplit split) throws IOException { Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; this.split = split; useCache = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEUSERCFILESYNCCACHE); if (split.getStart() > in.getPosition()) { long oldSync = useCache ? syncCache.get(split) : -1; if(oldSync == -1) { in.sync(split.getStart()); // sync to start } else { in.seek(oldSync); } } this.start = in.getPosition(); more = start < end; }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
@Test public void testReadOldFileHeader() throws IOException { String[] row = new String[]{"Tester", "Bart", "333 X St.", "Reno", "NV", "USA"}; RCFile.Reader reader = new RCFile.Reader(fs, new Path(HiveTestUtils.getFileFromClasspath("rc-file-v0.rc")), conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); assertTrue("old file reader first row", reader.next(rowID)); reader.getCurrentRow(cols); assertEquals(row.length, cols.size()); for (int i=0; i < cols.size(); ++i) { assertEquals(row[i], new String(cols.get(i).getBytesCopy())); } assertFalse("old file reader end", reader.next(rowID)); reader.close(); }
public RCFileBlockMergeRecordReader(Configuration conf, FileSplit split) throws IOException { path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; if (split.getStart() > in.getPosition()) { in.sync(split.getStart()); // sync to start } this.start = in.getPosition(); more = start < end; }
public RCFileBlockMergeRecordReader(Configuration conf, FileSplit split) throws IOException { path = split.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new RCFile.Reader(fs, path, conf); this.end = split.getStart() + split.getLength(); this.conf = conf; if (split.getStart() > in.getPosition()) { in.sync(split.getStart()); // sync to start } this.start = in.getPosition(); more = start < end; }