@VisibleForTesting public static Directory getAcidState(Path directory, Configuration conf, ValidWriteIdList writeIdList ) throws IOException { return getAcidState(directory, conf, writeIdList, false, false); }
private void deleteDeltaIfExists(Path partitionPath, long writeId, int bucketId) throws IOException { Path deltaPath = AcidUtils.createFilename(partitionPath, new AcidOutputFormat.Options(configuration) .bucket(bucketId) .minimumWriteId(writeId) .maximumWriteId(writeId)); FileSystem fileSystem = deltaPath.getFileSystem(configuration); if (fileSystem.exists(deltaPath)) { LOG.info("Deleting existing delta path: {}", deltaPath); fileSystem.delete(deltaPath, false); } }
jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString()); boolean isTransactionalTable = AcidUtils.isTablePropertyTransactional(tableProperties); AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(tableProperties); AcidUtils.setAcidOperationalProperties( jobProperties, isTransactionalTable, acidOperationalProperties);
/** * Safety check to make sure a file take from one acid table is not added into another acid table * since the ROW__IDs embedded as part a write to one table won't make sense in different * table/cluster. */ public static void validateAcidFiles(Table table, FileStatus[] srcs, FileSystem fs) throws SemanticException { if (!AcidUtils.isFullAcidTable(table)) { return; } validateAcidFiles(srcs, fs); }
/** * Return a base or delta directory string * according to the given "baseDirRequired". */ public static String baseOrDeltaSubdir(boolean baseDirRequired, long min, long max, int statementId) { if (!baseDirRequired) { return deltaSubdir(min, max, statementId); } else { return baseDir(min); } }
public TableScanDesc(final String alias, List<VirtualColumn> vcs, Table tblMetadata) { this.alias = alias; this.virtualCols = vcs; this.tableMetadata = tblMetadata; isAcidTable = AcidUtils.isAcidTable(this.tableMetadata); if (isAcidTable) { acidOperationalProperties = AcidUtils.getAcidOperationalProperties(this.tableMetadata); } }
new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE); AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, writeIdList); AcidUtils.deleteDeltaSubdir(200,200,0) : AcidUtils.deleteDeltaSubdir(200,200)), directory.getCurrentDirectories().get(0).getPath()); assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200,200,0) : AcidUtils.deltaSubdir(200,200)), directory.getCurrentDirectories().get(1).getPath()); Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET); Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET); Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath(); AcidUtils.setAcidOperationalProperties(conf, true, null); conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true)); assertEquals(null, merger.getMinKey()); assertEquals(null, merger.getMaxKey()); new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options() .isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
String... records) throws Exception { ValidWriteIdList writeIds = getTransactionContext(conf); AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, writeIds); Assert.assertEquals(0, dir.getObsolete().size()); Assert.assertEquals(0, dir.getOriginalFiles().size()); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg"); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string"); AcidUtils.setAcidOperationalProperties(job, true, null); job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
@Test public void testAcidOperationalPropertiesSettersAndGetters() throws Exception { AcidUtils.AcidOperationalProperties oprProps = AcidUtils.AcidOperationalProperties.getDefault(); Configuration testConf = new Configuration(); // Test setter for configuration object. AcidUtils.setAcidOperationalProperties(testConf, true, oprProps); assertEquals(1, testConf.getInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, -1)); // Test getter for configuration object. assertEquals(oprProps.toString(), AcidUtils.getAcidOperationalProperties(testConf).toString()); Map<String, String> parameters = new HashMap<String, String>(); // Test setter for map object. AcidUtils.setAcidOperationalProperties(parameters, true, oprProps); assertEquals(oprProps.toString(), parameters.get(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname)); // Test getter for map object. assertEquals(1, AcidUtils.getAcidOperationalProperties(parameters).toInt()); parameters.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, oprProps.toString()); // Set the appropriate key in the map and test that we are able to read it back correctly. assertEquals(1, AcidUtils.getAcidOperationalProperties(parameters).toInt()); }
AcidUtils.Directory dir = AcidUtils.getAcidState(partLoc, conf, getTransactionContext(conf)); for(AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) { for(FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) { Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" + lengthFileSize, lengthFileSize > 0); long logicalLength = AcidUtils.getLogicalLength(fs, stat); long actualLength = stat.getLen(); Assert.assertTrue("", logicalLength == actualLength); dir = AcidUtils.getAcidState(partLoc, conf, getTransactionContext(conf)); for(AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) { for(FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) { Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" + lengthFileSize, lengthFileSize > 0); long logicalLength = AcidUtils.getLogicalLength(fs, stat); long actualLength = stat.getLen(); Assert.assertTrue("", logicalLength <= actualLength);
public static List<FileStatus> getAcidFilesForStats( Table table, Path dir, Configuration jc, FileSystem fs) throws IOException { List<FileStatus> fileList = new ArrayList<>(); ValidWriteIdList idList = AcidUtils.getTableValidWriteIdList(jc, AcidUtils.getFullTableName(table.getDbName(), table.getTableName())); if (idList == null) { LOG.warn("Cannot get ACID state for " + table.getDbName() + "." + table.getTableName() return null; Directory acidInfo = AcidUtils.getAcidState(dir, jc, idList); if (!acidInfo.getCurrentDirectories().isEmpty() && AcidUtils.isFullAcidTable(table)) { Utilities.FILE_OP_LOGGER.warn( "Computing stats for an ACID table; stats may be inaccurate");
wo.inspector(inspector); wo.callback(new OrcRecordUpdater.KeyIndexBuilder("testEmpty")); Writer w = OrcFile.createWriter(AcidUtils.createBucketFile(new Path(root, AcidUtils.baseDir(100)), BUCKET), wo); w.close(); new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testEmpty:200:" + Long.MAX_VALUE); AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, writeIdList); Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET); Reader baseReader = OrcFile.createReader(basePath, new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(false)); RecordIdentifier key = merger.createKey(); OrcStruct value = merger.createValue();
private boolean isSchemaEvolutionEnabled(Table tbl) { boolean isAcid = AcidUtils.isTablePropertyTransactional(tbl.getMetadata()); if (isAcid || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION)) { return true; } return false; }
deleteDeltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId()) : deltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId()); } else { subdir = options.isWritingDeleteDelta() ? deleteDeltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId(), options.getStatementId()) : deltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId(), options.getStatementId()); subdir = addVisibilitySuffix(subdir, options.getVisibilityTxnId()); return createBucketFile(new Path(directory, subdir), options.getBucketId());
.inspector(inspector).bucket(BUCKET).writingBase(false).minimumWriteId(1) .maximumWriteId(1).finalDestination(root); Path delta1_1_0 = new Path(root, AcidUtils.deltaSubdir( options.getMinimumWriteId(), options.getMaximumWriteId(), options.getStatementId())); Path bucket0 = AcidUtils.createBucketFile(delta1_1_0, BUCKET); Path bucket0SideFile = OrcAcidUtils.getSideFile(bucket0); AcidUtils.getLogicalLength(fs, bucket0File); Assert.assertTrue("no " + bucket0, fs.exists(bucket0)); Assert.assertFalse("unexpected " + bucket0SideFile, fs.exists(bucket0SideFile)); AcidUtils.getLogicalLength(fs, bucket0File)); expectedException.expect(IOException.class); expectedException.expectMessage(bucket0SideFile.getName() + " found but is not readable"); AcidUtils.getLogicalLength(fs, bucket0File);
public boolean isFullAcidTable() { if(getTable() != null) { return AcidUtils.isFullAcidTable(table); } else { return AcidUtils.isTablePropertyTransactional(getTableInfo().getProperties()) && !AcidUtils.isInsertOnlyTable(getTableInfo().getProperties()); } }
deleteDeltaSubdir(options.getMinimumTransactionId(), options.getMaximumTransactionId()) : deltaSubdir(options.getMinimumTransactionId(), options.getMaximumTransactionId()); } else { subdir = options.isWritingDeleteDelta() ? deleteDeltaSubdir(options.getMinimumTransactionId(), options.getMaximumTransactionId(), options.getStatementId()) : deltaSubdir(options.getMinimumTransactionId(), options.getMaximumTransactionId(), options.getStatementId()); return createBucketFile(new Path(directory, subdir), options.getBucket());
AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable()); AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
Path bucketPath = AcidUtils.createBucketFile(mergerOptions.getBaseDir(), bucket); if(fs.exists(bucketPath) && fs.getFileStatus(bucketPath).getLen() > 0) { .searchArgument(null, null).range(0, Long.MAX_VALUE); for(Path delta: deltaDirectory) { if(!mergerOptions.isCompacting() && !AcidUtils.isDeleteDelta(delta)) { AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta, delta.getFileSystem(conf)); if(deltaDir.isRawFormat()) { assert !deltaDir.isDeleteDelta() : delta.toString(); long length = AcidUtils.getLogicalLength(fs, fs.getFileStatus(deltaFile)); assert length >= 0; Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length));
/** * Each write statement in a transaction creates its own delta dir. * @since 1.3.x */ public static String deltaSubdir(long min, long max, int statementId) { return deltaSubdir(min, max) + "_" + String.format(STATEMENT_DIGITS, statementId); }