/** * Whenever we are reading a batch, we must ensure that all the records in the batch * have the same bucket id as the bucket id of the split. If not, throw exception. */ private void checkBucketId(int bucketPropertyFromRecord) throws IOException { int bucketIdFromRecord = BucketCodec.determineVersion(bucketPropertyFromRecord) .decodeWriterId(bucketPropertyFromRecord); if(bucketIdFromRecord != bucketForSplit) { DeleteRecordKey dummy = new DeleteRecordKey(); setCurrentDeleteKey(dummy); throw new IOException("Corrupted records with different bucket ids " + "from the containing bucket file found! Expected bucket id " + bucketForSplit + ", however found " + dummy + ". (" + orcSplit + "," + deleteDeltaFile + ")"); } }
static int encodeBucketId(Configuration conf, int bucketId, int statementId) { return BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).bucket(bucketId) .statementId(statementId)); } /**
public static String bucketToString(int bucketId) { if (bucketId == -1) return "" + bucketId; BucketCodec codec = BucketCodec.determineVersion(bucketId); return bucketId + "(" + codec.getVersion() + "." + codec.decodeWriterId(bucketId) + "." + codec.decodeStatementId(bucketId) + ")"; } }
if(hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEST.name(), false) || hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEZ_TEST.name(), false)) { bucketCodec = BucketCodec.getCodec( hc.getInt(HiveConf.ConfVars.TESTMODE_BUCKET_CODEC_VERSION.name(), BucketCodec.V1.getVersion())); this.bucket.set(bucketCodec.encode(options)); this.path = AcidUtils.createFilename(partitionRoot, options); this.deleteEventWriter = null;
public static BucketCodec determineVersion(int bucket) { assert 7 << 29 == BucketCodec.TOP3BITS_MASK; //look at top 3 bits and return appropriate enum try { return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29); } catch(IllegalArgumentException ex) { throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket); } } public static BucketCodec getCodec(int version) {
/** * Convert a RecordIdentifier. This is done so that we can use the RecordIdentifier in place * of the bucketing column. * @param i RecordIdentifier to convert * @return value of the bucket identifier */ public IntWritable evaluate(RecordIdentifier i) { if (i == null) { return null; } else { BucketCodec decoder = BucketCodec.determineVersion(i.getBucketProperty()); intWritable.set(decoder.decodeWriterId(i.getBucketProperty())); return intWritable; } }
@Override public Object attachBucketIdToRecord(Object record) { int bucketId = computeBucketId(record); int bucketProperty = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucketId)); RecordIdentifier recordIdentifier = new RecordIdentifier(INVALID_TRANSACTION_ID, bucketProperty, INVALID_ROW_ID); structObjectInspector.setStructFieldData(record, recordIdentifierField, recordIdentifier); return record; }
private static int getBucketId(OrcStruct row) { int bucketValue = OrcRecordUpdater.getBucket(row); return BucketCodec.determineVersion(bucketValue).decodeWriterId(bucketValue); } @Test
private static int encodeBucket(int bucketId) { return BucketCodec.V1.encode( new AcidOutputFormat.Options(null).bucket(bucketId)); }
private RecordIdentifier extractRecordIdentifier(OperationType operationType, List<String> newPartitionValues, Object record) throws BucketIdException { RecordIdentifier recordIdentifier = recordInspector.extractRecordIdentifier(record); int bucketIdFromRecord = BucketCodec.determineVersion( recordIdentifier.getBucketProperty()).decodeWriterId(recordIdentifier.getBucketProperty()); int computedBucketId = bucketIdResolver.computeBucketId(record); if (operationType != OperationType.DELETE && bucketIdFromRecord != computedBucketId) { throw new BucketIdException("RecordIdentifier.bucketId != computed bucketId (" + computedBucketId + ") for record " + recordIdentifier + " in partition " + newPartitionValues + "."); } return recordIdentifier; }
DummyRow(long val, long rowId, long origTxn, int bucket) { field = new LongWritable(val); bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket)); ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); }
protected RecordUpdater createRecordUpdater(AcidOutputFormat<?, ?> outputFormat) throws IOException { int bucketId = BucketCodec .determineVersion(bucketProperty).decodeWriterId(bucketProperty); return outputFormat.getRecordUpdater( partitionPath, new AcidOutputFormat.Options(configuration) .inspector(objectInspector) .bucket(bucketId) .minimumWriteId(writeId) .maximumWriteId(writeId) .recordIdColumn(recordIdColumn) .finalDestination(partitionPath) .statementId(-1)); }
BigRow(byte[] val, long rowId, long origTxn, int bucket) { field = new BytesWritable(val); bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket)); this.rowId = new RecordIdentifier(origTxn, bucket, rowId); }
txnBatch.close(); Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
@Test public void testAttachBucketIdToRecord() { MutableRecord record = new MutableRecord(1, "hello"); capturingBucketIdResolver.attachBucketIdToRecord(record); assertThat(record.rowId, is(new RecordIdentifier(-1L, BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)), -1L))); assertThat(record.id, is(1)); assertThat(record.msg.toString(), is("hello")); }
connection.close(); Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); rs = queryTable(driver, "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
OrcRawRecordMerger.TransactionMetaData.findWriteIDForSynthetcRowIDs(file.getPath(), rootDir, conf); int bucketId = AcidUtils.parseBucketId(file.getPath()); int bucketProperty = BucketCodec.V1.encode(new AcidOutputFormat.Options(conf)
bucketInspector.get(recIdInspector.getStructFieldData(recId, bucketField)); int bucketNum = BucketCodec.determineVersion(bucketProperty).decodeWriterId(bucketProperty); writerOffset = 0; if (multiFileSpray) {
connection.close(); Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); rs = queryTable(driver, "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");