@Override public Writable newInstance() { return new BytesRefArrayWritable(); }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
public void performSequenceFileRead(FileSystem fs, int count, Path file) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); ByteWritable key = new ByteWritable(); BytesRefArrayWritable val = new BytesRefArrayWritable(); for (int i = 0; i < count; i++) { reader.next(key, val); } }
public int performRCFileReadFirstColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException { byte[][] checkBytes = null; BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber); if (chechCorrect) { resetRandomGenerators(); checkBytes = new byte[allColumnsNumber][]; } int actualReadCount = 0; java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>(); readCols.add(Integer.valueOf(0)); ColumnProjectionUtils.appendReadColumns(conf, readCols); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); boolean ok = true; if (chechCorrect) { nextRandomRow(checkBytes, checkRow); ok = ok && (checkRow.get(0).equals(cols.get(0))); } if (!ok) { throw new IllegalStateException("Compare read and write error."); } actualReadCount++; } return actualReadCount; }
public int performRCFileFullyReadColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException { byte[][] checkBytes = null; BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber); if (chechCorrect) { resetRandomGenerators(); checkBytes = new byte[allColumnsNumber][]; } int actualReadCount = 0; ColumnProjectionUtils.setReadAllColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); boolean ok = true; if (chechCorrect) { nextRandomRow(checkBytes, checkRow); ok = ok && checkRow.equals(cols); } if (!ok) { throw new IllegalStateException("Compare read and write error."); } actualReadCount++; } return actualReadCount; }
BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber); if (chechCorrect) { resetRandomGenerators(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); final int recCount = 100; Random rand = new Random(); break; BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); for (int i = 0; i < bytesArray.length; i++) { BytesRefWritable cu = null;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); for (int i = 0; i < bytesArray.length; i++) { BytesRefWritable cu = null;
BytesRefArrayWritable cols = new BytesRefArrayWritable();
"hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8)}; s = new BytesRefArrayWritable(bytesArray.length); s.set(0, new BytesRefWritable("123".getBytes(StandardCharsets.UTF_8))); s.set(1, new BytesRefWritable("456".getBytes(StandardCharsets.UTF_8)));
new Text("dog")), new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8);
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException { cleanup(); RCFileOutputFormat.setColumnNumber(conf, fieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length); for (int i = 0; i < fieldsData.length; i++) { BytesRefWritable cu = null; cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length); bytes.set(i, cu); } for (int i = 0; i < count; i++) { writer.append(bytes); } writer.close(); long fileLen = fs.getFileStatus(file).getLen(); System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen); }
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); for (int i = 0; i < bytesArray.length; i++) { BytesRefWritable cu = null;
@Test public void testReadOldFileHeader() throws IOException { String[] row = new String[]{"Tester", "Bart", "333 X St.", "Reno", "NV", "USA"}; RCFile.Reader reader = new RCFile.Reader(fs, new Path(HiveTestUtils.getFileFromClasspath("rc-file-v0.rc")), conf); LongWritable rowID = new LongWritable(); BytesRefArrayWritable cols = new BytesRefArrayWritable(); assertTrue("old file reader first row", reader.next(rowID)); reader.getCurrentRow(cols); assertEquals(row.length, cols.size()); for (int i=0; i < cols.size(); ++i) { assertEquals(row[i], new String(cols.get(i).getBytesCopy())); } assertFalse("old file reader end", reader.next(rowID)); reader.close(); }
"NULL".getBytes(StandardCharsets.UTF_8)}; BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
private void writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException { fs.delete(file, true); resetRandomGenerators(); RCFileOutputFormat.setColumnNumber(conf, columnNum); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec); byte[][] columnRandom; BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes); writer.append(bytes); } writer.close(); }
BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols);
rest = new BytesRefArrayWritable();
private void writeSeqenceFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException { byte[][] columnRandom; resetRandomGenerators(); BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } // zero length key is not allowed by block compress writer, so we use a byte // writable ByteWritable key = new ByteWritable(); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, file, ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK, codec); for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes); seqWriter.append(key, bytes); } seqWriter.close(); }