@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); // When split-update is enabled, we do not need to account for buckets that aren't covered. // This is a huge performance benefit of split-update. And the reason why we are able to // do so is because the 'deltas' here are actually only the delete_deltas. All the insert_deltas // with valid user payload data has already been considered as base for the covered buckets. // Hence, the uncovered buckets do not have any relevant data and we can just ignore them. if (acidOperationalProperties != null && acidOperationalProperties.isSplitUpdate()) { return Collections.emptyList(); } // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. if (!deltas.isEmpty()) { //since HIVE-17089 if here, then it's not an acid table so there should never be any deltas throw new IllegalStateException("Found unexpected deltas: " + deltas + " in " + dir); } return splits; }
@Override public String toString() { StringBuilder str = new StringBuilder(); if (isSplitUpdate()) { str.append("|" + SPLIT_UPDATE_STRING); } if (isHashBasedMerge()) { str.append("|" + HASH_BASED_MERGE_STRING); } return str.toString(); } }
@Override public String toString() { StringBuilder str = new StringBuilder(); if (isSplitUpdate()) { str.append("|" + SPLIT_UPDATE_STRING); } if (isHashBasedMerge()) { str.append("|" + HASH_BASED_MERGE_STRING); } if (isInsertOnly()) { str.append("|" + INSERT_ONLY_STRING); } return str.toString(); } }
if(!acidOperationalProperties.isSplitUpdate()) { throw new IllegalStateException("Expected SpliUpdate table: " + split.getPath());
boolean isReadNotAllowed = !isAcidRead || !acidOperationalProperties.isSplitUpdate() || !(inputSplit instanceof OrcSplit); if (isReadNotAllowed) {
context.acidOperationalProperties.isSplitUpdate()) {
= AcidUtils.getAcidOperationalProperties(jobConf); if (!isMajor && acidOperationalProperties.isSplitUpdate()) {
if (acidOperationalProperties.isSplitUpdate()) {
if (acidOperationalProperties.isSplitUpdate()) {
continue; if (sawDeleteRecord && acidOperationalProperties.isSplitUpdate()) { if (deleteEventWriter == null) { getDeleteEventWriter(reporter, reader.getObjectInspector(), split.getBucket());
final AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(conf); isSplitUpdate = acidOperationalProperties.isSplitUpdate();
/** * Returns whether it is possible to create a valid instance of this class for a given split. * @param conf is the job configuration * @param inputSplit * @return true if it is possible, else false. */ public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) { if (!(inputSplit instanceof OrcSplit)) { return false; // must be an instance of OrcSplit. } // First check if we are reading any original files in the split. // To simplify the vectorization logic, the vectorized acid row batch reader does not handle // original files for now as they have a different schema than a regular ACID file. final OrcSplit split = (OrcSplit) inputSplit; if (AcidUtils.getAcidOperationalProperties(conf).isSplitUpdate() && !split.isOriginal()) { // When split-update is turned on for ACID, a more optimized vectorized batch reader // can be created. But still only possible when we are *NOT* reading any originals. return true; } return false; // no split-update or possibly reading originals! }
@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); // When split-update is enabled, we do not need to account for buckets that aren't covered. // This is a huge performance benefit of split-update. And the reason why we are able to // do so is because the 'deltas' here are actually only the delete_deltas. All the insert_deltas // with valid user payload data has already been considered as base for the covered buckets. // Hence, the uncovered buckets do not have any relevant data and we can just ignore them. if (acidOperationalProperties != null && acidOperationalProperties.isSplitUpdate()) { return splits; // return an empty list. } // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. if (!deltas.isEmpty()) { for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1, -1)); } } } return splits; }
@Override public void insert(long currentTransaction, Object row) throws IOException { if (this.currentTransaction.get() != currentTransaction) { insertedRows = 0; //this method is almost no-op in hcatalog.streaming case since statementId == 0 is //always true in that case rowIdOffset = findRowIdOffsetForInsert(); } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row); } else { addSimpleEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row); } rowCountDelta++; }
private void assertsForAcidOperationalProperties(AcidUtils.AcidOperationalProperties testObj, String type) throws Exception { switch(type) { case "split_update": case "default": assertEquals(true, testObj.isSplitUpdate()); assertEquals(false, testObj.isHashBasedMerge()); assertEquals(1, testObj.toInt()); assertEquals("|split_update", testObj.toString()); break; default: break; } }
@Override public void delete(long currentTransaction, Object row) throws IOException { if (this.currentTransaction.get() != currentTransaction) { insertedRows = 0; } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(DELETE_OPERATION, currentTransaction, -1L, row); } else { addSimpleEvent(DELETE_OPERATION, currentTransaction, -1L, row); } rowCountDelta--; }
@Override public void update(long currentTransaction, Object row) throws IOException { if (this.currentTransaction.get() != currentTransaction) { insertedRows = 0; rowIdOffset = findRowIdOffsetForInsert(); } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(UPDATE_OPERATION, currentTransaction, -1L, row); } else { addSimpleEvent(UPDATE_OPERATION, currentTransaction, -1L, row); } }
@Override public void insert(long currentWriteId, Object row) throws IOException { if (this.currentWriteId.get() != currentWriteId) { insertedRows = 0; } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(INSERT_OPERATION, currentWriteId, insertedRows++, row); } else { addSimpleEvent(INSERT_OPERATION, currentWriteId, insertedRows++, row); } rowCountDelta++; bufferedRows++; }
@Override public void update(long currentWriteId, Object row) throws IOException { if (this.currentWriteId.get() != currentWriteId) { insertedRows = 0; } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(UPDATE_OPERATION, currentWriteId, -1L, row); } else { addSimpleEvent(UPDATE_OPERATION, currentWriteId, -1L, row); } }
@Override public void delete(long currentWriteId, Object row) throws IOException { if (this.currentWriteId.get() != currentWriteId) { insertedRows = 0; } if (acidOperationalProperties.isSplitUpdate()) { addSplitUpdateEvent(DELETE_OPERATION, currentWriteId, -1L, row); } else { addSimpleEvent(DELETE_OPERATION, currentWriteId, -1L, row); } rowCountDelta--; }