org.apache.hadoop.hive.ql.io.BucketCodec java code examples

/**
 * Whenever we are reading a batch, we must ensure that all the records in the batch
 * have the same bucket id as the bucket id of the split. If not, throw exception.
 */
private void checkBucketId(int bucketPropertyFromRecord) throws IOException {
 int bucketIdFromRecord = BucketCodec.determineVersion(bucketPropertyFromRecord)
  .decodeWriterId(bucketPropertyFromRecord);
 if(bucketIdFromRecord != bucketForSplit) {
  DeleteRecordKey dummy = new DeleteRecordKey();
  setCurrentDeleteKey(dummy);
  throw new IOException("Corrupted records with different bucket ids "
    + "from the containing bucket file found! Expected bucket id "
    + bucketForSplit + ", however found " + dummy
    + ".  (" + orcSplit + "," + deleteDeltaFile + ")");
 }
}

static int encodeBucketId(Configuration conf, int bucketId, int statementId) {
 return BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).bucket(bucketId)
   .statementId(statementId));
}
/**

 public static String bucketToString(int bucketId) {
  if (bucketId == -1) return "" + bucketId;
  BucketCodec codec =
   BucketCodec.determineVersion(bucketId);
  return  bucketId + "(" + codec.getVersion() + "." +
    codec.decodeWriterId(bucketId) + "." +
    codec.decodeStatementId(bucketId) + ")";
 }
}

 if(hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEST.name(), false) ||
  hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEZ_TEST.name(), false)) {
  bucketCodec = BucketCodec.getCodec(
   hc.getInt(HiveConf.ConfVars.TESTMODE_BUCKET_CODEC_VERSION.name(),
    BucketCodec.V1.getVersion()));
this.bucket.set(bucketCodec.encode(options));
this.path = AcidUtils.createFilename(partitionRoot, options);
this.deleteEventWriter = null;

public static BucketCodec determineVersion(int bucket) {
 assert 7 << 29 == BucketCodec.TOP3BITS_MASK;
 //look at top 3 bits and return appropriate enum
 try {
  return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29);
 }
 catch(IllegalArgumentException ex) {
  throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket);
 }
}
public static BucketCodec getCodec(int version) {

/**
 * Convert a RecordIdentifier.  This is done so that we can use the RecordIdentifier in place
 * of the bucketing column.
 * @param i RecordIdentifier to convert
 * @return value of the bucket identifier
 */
public IntWritable evaluate(RecordIdentifier i) {
 if (i == null) {
  return null;
 } else {
  BucketCodec decoder =
   BucketCodec.determineVersion(i.getBucketProperty());
  intWritable.set(decoder.decodeWriterId(i.getBucketProperty()));
  return intWritable;
 }
}

@Override
public Object attachBucketIdToRecord(Object record) {
 int bucketId = computeBucketId(record);
 int bucketProperty =
  BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucketId));
 RecordIdentifier recordIdentifier = new RecordIdentifier(INVALID_TRANSACTION_ID, bucketProperty, INVALID_ROW_ID);
 structObjectInspector.setStructFieldData(record, recordIdentifierField, recordIdentifier);
 return record;
}

private static int getBucketId(OrcStruct row) {
 int bucketValue = OrcRecordUpdater.getBucket(row);
 return
  BucketCodec.determineVersion(bucketValue).decodeWriterId(bucketValue);
}
@Test

private static int encodeBucket(int bucketId) {
 return BucketCodec.V1.encode(
  new AcidOutputFormat.Options(null).bucket(bucketId));
}

private RecordIdentifier extractRecordIdentifier(OperationType operationType, List<String> newPartitionValues,
  Object record) throws BucketIdException {
 RecordIdentifier recordIdentifier = recordInspector.extractRecordIdentifier(record);
 int bucketIdFromRecord = BucketCodec.determineVersion(
  recordIdentifier.getBucketProperty()).decodeWriterId(recordIdentifier.getBucketProperty());
 int computedBucketId = bucketIdResolver.computeBucketId(record);
 if (operationType != OperationType.DELETE && bucketIdFromRecord != computedBucketId) {
  throw new BucketIdException("RecordIdentifier.bucketId != computed bucketId (" + computedBucketId
    + ") for record " + recordIdentifier + " in partition " + newPartitionValues + ".");
 }
 return recordIdentifier;
}

DummyRow(long val, long rowId, long origTxn, int bucket) {
 field = new LongWritable(val);
 bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket));
 ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
}

protected RecordUpdater createRecordUpdater(AcidOutputFormat<?, ?> outputFormat) throws IOException {
 int bucketId = BucketCodec
  .determineVersion(bucketProperty).decodeWriterId(bucketProperty); 
 return outputFormat.getRecordUpdater(
   partitionPath,
   new AcidOutputFormat.Options(configuration)
     .inspector(objectInspector)
     .bucket(bucketId)
     .minimumWriteId(writeId)
     .maximumWriteId(writeId)
     .recordIdColumn(recordIdColumn)
     .finalDestination(partitionPath)
     .statementId(-1));
}

BigRow(byte[] val, long rowId, long origTxn, int bucket) {
 field = new BytesWritable(val);
 bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket));
 this.rowId = new RecordIdentifier(origTxn, bucket, rowId);
}

txnBatch.close();
Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

@Test
public void testAttachBucketIdToRecord() {
 MutableRecord record = new MutableRecord(1, "hello");
 capturingBucketIdResolver.attachBucketIdToRecord(record);
 assertThat(record.rowId, is(new RecordIdentifier(-1L,
  BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)),
  -1L)));
 assertThat(record.id, is(1));
 assertThat(record.msg.toString(), is("hello"));
}

connection.close();
Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
rs = queryTable(driver, "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

  OrcRawRecordMerger.TransactionMetaData.findWriteIDForSynthetcRowIDs(file.getPath(), rootDir, conf);
int bucketId = AcidUtils.parseBucketId(file.getPath());
int bucketProperty = BucketCodec.V1.encode(new AcidOutputFormat.Options(conf)

  bucketInspector.get(recIdInspector.getStructFieldData(recId, bucketField));
int bucketNum =
 BucketCodec.determineVersion(bucketProperty).decodeWriterId(bucketProperty);
writerOffset = 0;
if (multiFileSpray) {

 LOG.warn(s);
Assert.assertEquals(536870912, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(0)));
Assert.assertEquals(536936448, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(1)));

connection.close();
Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
rs = queryTable(driver, "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

Javadoc

This class makes sense of RecordIdentifier#getBucketProperty(). Up until ASF Hive 3.0 this field was simply the bucket ID. Since 3.0 it does bit packing to store several things: top 3 bits - version describing the format (we can only have 8). The rest is version specific - see below.

Most used methods

decodeWriterId
For bucketed tables this the bucketId, otherwise writerId
determineVersion
encode
decodeStatementId
getCodec
getVersion

Popular in Java

Making http requests using okhttp
getApplicationContext (Context)
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (ScheduledExecutorService)
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
BoxLayout (javax.swing)
Best IntelliJ plugins

How to useBucketCodec in org.apache.hadoop.hive.ql.io

Best Java code snippets using org.apache.hadoop.hive.ql.io.BucketCodec (Showing top 20 results out of 315)

How to use
BucketCodec
in
org.apache.hadoop.hive.ql.io