parseBaseOrDeltaBucketFilename(Path bucketFile, Configuration conf) { AcidOutputFormat.Options result = new AcidOutputFormat.Options(conf); String filename = bucketFile.getName(); if (ORIGINAL_PATTERN.matcher(filename).matches()) { Integer.parseInt(filename.substring(0, filename.indexOf('_'))); result .setOldStyle(true) .minimumTransactionId(0) .maximumTransactionId(0) .bucket(bucket) .writingBase(true); } else if (filename.startsWith(BUCKET_PREFIX)) { int bucket = if (bucketFile.getParent().getName().startsWith(BASE_PREFIX)) { result .setOldStyle(false) .minimumTransactionId(0) .maximumTransactionId(parseBase(bucketFile.getParent())) .bucket(bucket) .writingBase(true); } else if (bucketFile.getParent().getName().startsWith(DELTA_PREFIX)) { ParsedDelta parsedDelta = parsedDelta(bucketFile.getParent(), DELTA_PREFIX); result .setOldStyle(false) .minimumTransactionId(parsedDelta.minTransaction) .maximumTransactionId(parsedDelta.maxTransaction)
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumWriteId(1) .maximumWriteId(1).inspector(inspector).finalDestination(mockPath); OrcOutputFormat of = new OrcOutputFormat(); RecordUpdater ru = of.getRecordUpdater(mockPath, options); for (int i = 0; i < 10; ++i) { ru.insert(options.getMinimumWriteId(), new MyRow(i, 2 * i));
MockRecordWriter(Path basedir, AcidOutputFormat.Options options) throws IOException { FileSystem fs = FileSystem.get(options.getConfiguration()); Path p = AcidUtils.createFilename(basedir, options); os = fs.create(p); }
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()){ flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; writerOptions = OrcFile.writerOptions(options.getConfiguration()); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); rowInspector = (StructObjectInspector)options.getInspector(); writerOptions.inspector(createEventSchema(findRecId(options.getInspector(), options.getRecordIdColumn()))); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS);
final Path filename = AcidUtils.createFilename(path, options); final OrcFile.WriterOptions opts = OrcFile.writerOptions(options.getConfiguration()); if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE) opts.inspector(options.getInspector()) .callback(watcher); final Writer writer = OrcFile.createWriter(filename, opts);
parseBaseBucketFilename(Path bucketFile, Configuration conf) { AcidOutputFormat.Options result = new AcidOutputFormat.Options(conf); String filename = bucketFile.getName(); result.writingBase(true); if (ORIGINAL_PATTERN.matcher(filename).matches()) { int bucket = Integer.parseInt(filename.substring(0, filename.indexOf('_'))); result .setOldStyle(true) .minimumTransactionId(0) .maximumTransactionId(0) .bucket(bucket); } else if (filename.startsWith(BUCKET_PREFIX)) { int bucket = Integer.parseInt(filename.substring(filename.indexOf('_') + 1)); result .setOldStyle(false) .minimumTransactionId(0) .maximumTransactionId(parseBase(bucketFile.getParent())) .bucket(bucket); } else { result.setOldStyle(true).bucket(-1).minimumTransactionId(0) .maximumTransactionId(0);
/** * Create a filename for a bucket file. * @param directory the partition directory * @param options the options for writing the bucket * @return the filename that should store the bucket */ public static Path createFilename(Path directory, AcidOutputFormat.Options options) { String subdir; if (options.getOldStyle()) { return new Path(directory, String.format(BUCKET_DIGITS, options.getBucket()) + "_0"); } else if (options.isWritingBase()) { subdir = BASE_PREFIX + String.format(DELTA_DIGITS, options.getMaximumTransactionId()); } else { subdir = deltaSubdir(options.getMinimumTransactionId(), options.getMaximumTransactionId()); } return createBucketFile(new Path(directory, subdir), options.getBucket()); }
private static RecordUpdater getRecordUpdater(JobConf jc, AcidOutputFormat<?, ?> acidOutputFormat, boolean isCompressed, long txnId, int bucket, ObjectInspector inspector, Properties tableProp, Path outPath, Reporter reporter, int rowIdColNum) throws IOException { return acidOutputFormat.getRecordUpdater(outPath, new AcidOutputFormat.Options(jc) .isCompressed(isCompressed) .tableProperties(tableProp) .reporter(reporter) .writingBase(false) .minimumTransactionId(txnId) .maximumTransactionId(txnId) .bucket(bucket) .inspector(inspector) .recordIdColumn(rowIdColNum)); }
private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumTransactionId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumTransactionId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket); // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
protected RecordUpdater createRecordUpdater(final Path partitionPath, int bucketId, Long minWriteId, Long maxWriteID) throws IOException { // Initialize table properties from the table parameters. This is required because the table // may define certain table parameters that may be required while writing. The table parameter // 'transactional_properties' is one such example. Properties tblProperties = new Properties(); tblProperties.putAll(table.getParameters()); return acidOutputFormat.getRecordUpdater(partitionPath, new AcidOutputFormat.Options(conf) .filesystem(fs) .inspector(outputRowObjectInspector) .bucket(bucketId) .tableProperties(tblProperties) .minimumWriteId(minWriteId) .maximumWriteId(maxWriteID) .statementId(-1) .finalDestination(partitionPath)); }
private RecordUpdater createRecordUpdater(int bucketId, Long minTxnId, Long maxTxnID) throws IOException, SerializationError { try { return outf.getRecordUpdater(partitionPath, new AcidOutputFormat.Options(conf) .inspector(getSerde().getObjectInspector()) .bucket(bucketId) .minimumTransactionId(minTxnId) .maximumTransactionId(maxTxnID)); } catch (SerDeException e) { throw new SerializationError("Failed to get object inspector from Serde " + getSerde().getClass().getName(), e); } }
private DummyOrcRecordUpdater(Path path, Options options) { this.path = path; this.inspector = options.getInspector(); this.out = options.getDummyStream(); }
public PhoenixRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.config = options.getConfiguration(); tableName = config.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME); this.objInspector = options.getInspector(); try { phoenixSerializer = new PhoenixSerializer(config, options.getTableProperties()); } catch (SerDeException e) { throw new IOException(e);
public static String getOptionsValue(Options options) { StringBuilder content = new StringBuilder(); int bucket = getBucket(options); String inspectorInfo = options.getInspector().getCategory() + ":" + options.getInspector() .getTypeName(); long maxTxnId = options.getMaximumWriteId(); long minTxnId = options.getMinimumWriteId(); int recordIdColumn = options.getRecordIdColumn(); boolean isCompresses = options.isCompressed(); boolean isWritingBase = options.isWritingBase(); content.append("bucket : ").append(bucket).append(", inspectorInfo : ").append (inspectorInfo).append(", minTxnId : ").append(minTxnId).append(", maxTxnId : ") .append(maxTxnId).append(", recordIdColumn : ").append(recordIdColumn); content.append(", isCompressed : ").append(isCompresses).append(", isWritingBase : ") .append(isWritingBase); return content.toString(); }
public PhoenixRecordWriter(Path path, AcidOutputFormat.Options options) throws IOException { Configuration config = options.getConfiguration(); Properties props = new Properties(); try { initialize(config, props); } catch (SQLException e) { throw new IOException(e); } this.objInspector = options.getInspector(); try { phoenixSerializer = new PhoenixSerializer(config, options.getTableProperties()); } catch (SerDeException e) { throw new IOException(e); } }
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .inspector(inspector).bucket(BUCKET).finalDestination(root); final int BUCKET_PROPERTY = BucketCodec.V1.encode(options); if(!use130Format) { options.statementId(-1); options.writingBase(true).maximumWriteId(100)); for(String v: values) { ru.insert(0, new MyRow(v)); ru = of.getRecordUpdater(root, options.writingBase(false) .minimumWriteId(200).maximumWriteId(200).recordIdColumn(1)); ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY)); ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS); int bucket = 0; AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .filesystem(fs) .bucket(bucket) .writingBase(false) .minimumWriteId(1) .maximumWriteId(1) .inspector(inspector) .reporter(Reporter.NULL) .recordIdColumn(1) .finalDestination(root); updater.insert(options.getMinimumWriteId(), new DummyRow(1, 0, options.getMinimumWriteId(), bucket)); updater.insert(options.getMinimumWriteId(), new DummyRow(2, 1, options.getMinimumWriteId(), bucket)); updater.insert(options.getMinimumWriteId(), new DummyRow(3, 2, options.getMinimumWriteId(), bucket)); updater.close(false); options.minimumWriteId(2) .maximumWriteId(2); updater = new OrcRecordUpdater(root, options); updater.insert(options.getMinimumWriteId(), new DummyRow(4, 0, options.getMinimumWriteId(), bucket)); updater.insert(options.getMinimumWriteId(), new DummyRow(5, 1, options.getMinimumWriteId(), bucket)); updater.insert(options.getMinimumWriteId(),
fileStatus, root, true, true, conf); AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) .bucket(0); int bucketProperty = BucketCodec.V1.encode(options);
if (options.getTableProperties() != null) { this.acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getTableProperties()); } else { this.acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration()); if(options.getConfiguration() != null) { Configuration hc = options.getConfiguration(); if(hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEST.name(), false) || hc.getBoolean(HiveConf.ConfVars.HIVE_IN_TEZ_TEST.name(), false)) { this.deleteEventWriter = null; this.deleteEventPath = null; FileSystem fs = options.getFilesystem(); if (fs == null) { fs = partitionRoot.getFileSystem(options.getConfiguration()); if (options.getMinimumWriteId() != options.getMaximumWriteId() && !options.isWritingBase()) { options.getReporter()); flushLengths.writeLong(0); OrcInputFormat.SHIMS.hflush(flushLengths); if (options.isWritingBase()) { if (options instanceof OrcOptions) { writerOptions = ((OrcOptions) options).getOrcOptions();
((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(new AcidOutputFormat .Options(conf).bucket(0).statementId(0)); ((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(new AcidOutputFormat .Options(conf).bucket(0).statementId(idx + 1)); for(long row_id : ((LongColumnVector) batch.cols[3]).vector) { indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION,