private void deleteDeltaIfExists(Path partitionPath, long writeId, int bucketId) throws IOException { Path deltaPath = AcidUtils.createFilename(partitionPath, new AcidOutputFormat.Options(configuration) .bucket(bucketId) .minimumWriteId(writeId) .maximumWriteId(writeId)); FileSystem fileSystem = deltaPath.getFileSystem(configuration); if (fileSystem.exists(deltaPath)) { LOG.info("Deleting existing delta path: {}", deltaPath); fileSystem.delete(deltaPath, false); } }
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .writingBase(true).isCompressed(false).maximumWriteId(maxTxn).bucket(0).statementId(-1) .visibilityTxnId(compactorTxnId);
private RecordUpdater createRecordUpdater(int bucketId, Long minWriteId, Long maxWriteID) throws IOException, SerializationError { try { // Initialize table properties from the table parameters. This is required because the table // may define certain table parameters that may be required while writing. The table parameter // 'transactional_properties' is one such example. Properties tblProperties = new Properties(); tblProperties.putAll(tbl.getParameters()); return outf.getRecordUpdater(partitionPath, new AcidOutputFormat.Options(conf) .inspector(getSerde().getObjectInspector()) .bucket(bucketId) .tableProperties(tblProperties) .minimumWriteId(minWriteId) .maximumWriteId(maxWriteID) .statementId(-1) .finalDestination(partitionPath)); } catch (SerDeException e) { throw new SerializationError("Failed to get object inspector from Serde " + getSerde().getClass().getName(), e); } }
Path p = new Path("/tmp"); Configuration conf = new Configuration(); AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .setOldStyle(true).bucket(1); assertEquals("/tmp/000001_0",
BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(0))); Assert.assertEquals(536936448, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(1)));
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (deleteEventWriter == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } } }
private static RecordUpdater getRecordUpdater(JobConf jc, AcidOutputFormat<?, ?> acidOutputFormat, int bucket, ObjectInspector inspector, Properties tableProp, Path outPath, Reporter reporter, int rowIdColNum, FileSinkDesc conf) throws IOException { return acidOutputFormat.getRecordUpdater(outPath, new AcidOutputFormat.Options(jc) .isCompressed(conf.getCompressed()) .tableProperties(tableProp) .reporter(reporter) .writingBase(false) .minimumTransactionId(conf.getTransactionId()) .maximumTransactionId(conf.getTransactionId()) .bucket(bucket) .inspector(inspector) .recordIdColumn(rowIdColNum) .statementId(conf.getStatementId()) .finalDestination(conf.getDestPath())); }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1);//setting statementId == -1 makes compacted delta files use //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private static RecordUpdater getRecordUpdater(JobConf jc, AcidOutputFormat<?, ?> acidOutputFormat, int bucket, ObjectInspector inspector, Properties tableProp, Path outPath, Reporter reporter, int rowIdColNum, FileSinkDesc conf) throws IOException { return acidOutputFormat.getRecordUpdater(outPath, new AcidOutputFormat.Options(jc) .isCompressed(conf.getCompressed()) .tableProperties(tableProp) .reporter(reporter) .writingBase(conf.getInsertOverwrite()) .minimumWriteId(conf.getTableWriteId()) .maximumWriteId(conf.getTableWriteId()) .bucket(bucket) .inspector(inspector) .recordIdColumn(rowIdColNum) .statementId(conf.getStatementId()) .finalDestination(conf.getDestPath())); }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use .visibilityTxnId(getCompactorTxnId(jobConf)); //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector).writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()).reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)).bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use // delta_xxxx_yyyy format .visibilityTxnId(getCompactorTxnId(jobConf)); // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
protected RecordUpdater createRecordUpdater(final Path partitionPath, int bucketId, Long minWriteId, Long maxWriteID) throws IOException { // Initialize table properties from the table parameters. This is required because the table // may define certain table parameters that may be required while writing. The table parameter // 'transactional_properties' is one such example. Properties tblProperties = new Properties(); tblProperties.putAll(table.getParameters()); return acidOutputFormat.getRecordUpdater(partitionPath, new AcidOutputFormat.Options(conf) .filesystem(fs) .inspector(outputRowObjectInspector) .bucket(bucketId) .tableProperties(tblProperties) .minimumWriteId(minWriteId) .maximumWriteId(maxWriteID) .statementId(statementId) .finalDestination(partitionPath)); }
@Test public void testCreateFilenameLargeIds() throws Exception { Path p = new Path("/tmp"); Configuration conf = new Configuration(); AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .setOldStyle(true).bucket(123456789); assertEquals("/tmp/123456789_0", AcidUtils.createFilename(p, options).toString()); options.bucket(23) .minimumWriteId(1234567880) .maximumWriteId(1234567890) .writingBase(true) .setOldStyle(false); assertEquals("/tmp/base_1234567890/bucket_00023", AcidUtils.createFilename(p, options).toString()); options.writingBase(false); assertEquals("/tmp/delta_1234567880_1234567890_0000/bucket_00023", AcidUtils.createFilename(p, options).toString()); }
protected RecordUpdater createRecordUpdater(AcidOutputFormat<?, ?> outputFormat) throws IOException { int bucketId = BucketCodec .determineVersion(bucketProperty).decodeWriterId(bucketProperty); return outputFormat.getRecordUpdater( partitionPath, new AcidOutputFormat.Options(configuration) .inspector(objectInspector) .bucket(bucketId) .minimumWriteId(writeId) .maximumWriteId(writeId) .recordIdColumn(recordIdColumn) .finalDestination(partitionPath) .statementId(-1)); }
private static int encodeBucket(int bucketId) { return BucketCodec.V1.encode( new AcidOutputFormat.Options(null).bucket(bucketId)); }
DummyRow(long val, long rowId, long origTxn, int bucket) { field = new LongWritable(val); bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket)); ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); }
BigRow(byte[] val, long rowId, long origTxn, int bucket) { field = new BytesWritable(val); bucket = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucket)); this.rowId = new RecordIdentifier(origTxn, bucket, rowId); }
@Override public Object attachBucketIdToRecord(Object record) { int bucketId = computeBucketId(record); int bucketProperty = BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(bucketId)); RecordIdentifier recordIdentifier = new RecordIdentifier(INVALID_TRANSACTION_ID, bucketProperty, INVALID_ROW_ID); structObjectInspector.setStructFieldData(record, recordIdentifierField, recordIdentifier); return record; }
static int encodeBucketId(Configuration conf, int bucketId, int statementId) { return BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).bucket(bucketId) .statementId(statementId)); } /**
@Test public void testAttachBucketIdToRecord() { MutableRecord record = new MutableRecord(1, "hello"); capturingBucketIdResolver.attachBucketIdToRecord(record); assertThat(record.rowId, is(new RecordIdentifier(-1L, BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)), -1L))); assertThat(record.id, is(1)); assertThat(record.msg.toString(), is("hello")); }