public PartitionWithStatistics(Partition partition, String partitionName, PartitionStatistics statistics) { this.partition = requireNonNull(partition, "partition is null"); this.partitionName = requireNonNull(partitionName, "partitionName is null"); checkArgument(toPartitionValues(partitionName).equals(partition.getValues()), "unexpected partition name: %s != %s", partitionName, partition.getValues()); this.statistics = requireNonNull(statistics, "statistics is null"); }
private static List<String> buildPartitionNames(List<Column> partitionColumns, List<Partition> partitions) { return partitions.stream() .map(partition -> makePartName(partitionColumns, partition.getValues())) .collect(toList()); }
public AlterPartitionOperation(PartitionWithStatistics newPartition, PartitionWithStatistics oldPartition) { this.newPartition = requireNonNull(newPartition, "newPartition is null"); this.oldPartition = requireNonNull(oldPartition, "oldPartition is null"); checkArgument(newPartition.getPartition().getDatabaseName().equals(oldPartition.getPartition().getDatabaseName())); checkArgument(newPartition.getPartition().getTableName().equals(oldPartition.getPartition().getTableName())); checkArgument(newPartition.getPartition().getValues().equals(oldPartition.getPartition().getValues())); }
private static List<HivePartitionKey> getPartitionKeys(Table table, Optional<Partition> partition) { if (!partition.isPresent()) { return ImmutableList.of(); } ImmutableList.Builder<HivePartitionKey> partitionKeys = ImmutableList.builder(); List<Column> keys = table.getPartitionColumns(); List<String> values = partition.get().getValues(); checkCondition(keys.size() == values.size(), HIVE_INVALID_METADATA, "Expected %s partition key values, but got %s", keys.size(), values.size()); for (int i = 0; i < keys.size(); i++) { String name = keys.get(i).getName(); HiveType hiveType = keys.get(i).getType(); if (!hiveType.isSupportedType()) { throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName())); } String value = values.get(i); checkCondition(value != null, HIVE_INVALID_PARTITION_VALUE, "partition key value cannot be null for field: %s", name); partitionKeys.add(new HivePartitionKey(name, value)); } return partitionKeys.build(); }
public String getDescription() { return format( "alter partition %s.%s %s", newPartition.getPartition().getDatabaseName(), newPartition.getPartition().getTableName(), newPartition.getPartition().getValues()); }
checkArgument(getPrestoQueryId(partition).isPresent()); Map<List<String>, Action<PartitionAndMore>> partitionActionsOfTable = partitionActions.computeIfAbsent(new SchemaTableName(databaseName, tableName), k -> new HashMap<>()); Action<PartitionAndMore> oldPartitionAction = partitionActionsOfTable.get(partition.getValues()); HdfsContext context = new HdfsContext(session, databaseName, tableName); if (oldPartitionAction == null) { partitionActionsOfTable.put( partition.getValues(), new Action<>(ActionType.ADD, new PartitionAndMore(partition, currentLocation, Optional.empty(), statistics, statistics), context)); return; partition.getValues(), new Action<>(ActionType.ALTER, new PartitionAndMore(partition, currentLocation, Optional.empty(), statistics, statistics), context)); break; case ALTER: case INSERT_EXISTING: throw new PrestoException(ALREADY_EXISTS, format("Partition already exists for table '%s.%s': %s", databaseName, tableName, partition.getValues())); default: throw new IllegalStateException("Unknown action type");
private PartitionStatistics getExistingPartitionStatistics(Partition partition, String partitionName) { try { PartitionStatistics statistics = delegate.getPartitionStatistics(partition.getDatabaseName(), partition.getTableName(), ImmutableSet.of(partitionName)) .get(partitionName); if (statistics == null) { throw new PrestoException( TRANSACTION_CONFLICT, format("The partition that this transaction modified was deleted in another transaction. %s %s", partition.getTableName(), partition.getValues())); } return statistics; } catch (PrestoException e) { if (e.getErrorCode().equals(HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode())) { log.warn( e, "Corrupted statistics found when altering partition. Table: %s.%s. Partition: %s", partition.getDatabaseName(), partition.getTableName(), partition.getValues()); return PartitionStatistics.empty(); } throw e; } }
@Override public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { Table table = getRequiredTable(databaseName, tableName); Partition partition = partitionWithStatistics.getPartition(); verifiedPartition(table, partition); Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues()); writeSchemaFile("partition", partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true); }
private Builder(Partition partition) { this.storageBuilder = Storage.builder(partition.getStorage()); this.databaseName = partition.getDatabaseName(); this.tableName = partition.getTableName(); this.values = partition.getValues(); this.columns = partition.getColumns(); this.parameters = partition.getParameters(); }
private void prepareInsertExistingPartition(HdfsContext context, PartitionAndMore partitionAndMore) { deleteOnly = false; Partition partition = partitionAndMore.getPartition(); Path targetPath = new Path(partition.getStorage().getLocation()); Path currentPath = partitionAndMore.getCurrentLocation(); cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, false)); if (!targetPath.equals(currentPath)) { asyncRename(hdfsEnvironment, renameExecutor, fileRenameCancelled, fileRenameFutures, context, currentPath, targetPath, partitionAndMore.getFileNames()); } updateStatisticsOperations.add(new UpdateStatisticsOperation( new SchemaTableName(partition.getDatabaseName(), partition.getTableName()), Optional.of(getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues())), partitionAndMore.getStatisticsUpdate(), true)); }
@Test public void testConvertPartition() { PartitionInput partitionInput = GlueInputConverter.convertPartition(testPartition); assertEquals(partitionInput.getParameters(), testPartition.getParameters()); assertStorage(partitionInput.getStorageDescriptor(), testPartition.getStorage()); assertEquals(partitionInput.getValues(), testPartition.getValues()); }
public static PartitionInput convertPartition(Partition partition) { PartitionInput input = new PartitionInput(); input.setValues(partition.getValues()); input.setStorageDescriptor(convertStorage(partition.getStorage(), partition.getColumns())); input.setParameters(partition.getParameters()); return input; }
@Override public void alterPartition(String databaseName, String tableName, PartitionWithStatistics partition) { try { PartitionInput newPartition = GlueInputConverter.convertPartition(partition); glueClient.updatePartition(new UpdatePartitionRequest() .withDatabaseName(databaseName) .withTableName(tableName) .withPartitionInput(newPartition) .withPartitionValueList(partition.getPartition().getValues())); } catch (EntityNotFoundException e) { throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partition.getPartition().getValues()); } catch (AmazonServiceException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
private void verifiedPartition(Table table, Partition partition) { Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues()); if (table.getTableType().equals(MANAGED_TABLE.name())) { if (!partitionMetadataDirectory.equals(new Path(partition.getStorage().getLocation()))) { throw new PrestoException(HIVE_METASTORE_ERROR, "Partition directory must be " + partitionMetadataDirectory); } } else if (table.getTableType().equals(EXTERNAL_TABLE.name())) { try { Path externalLocation = new Path(partition.getStorage().getLocation()); FileSystem externalFileSystem = hdfsEnvironment.getFileSystem(hdfsContext, externalLocation); if (!externalFileSystem.isDirectory(externalLocation)) { throw new PrestoException(HIVE_METASTORE_ERROR, "External partition location does not exist"); } if (isChildDirectory(catalogDirectory, externalLocation)) { throw new PrestoException(HIVE_METASTORE_ERROR, "External partition location can not be inside the system metadata directory"); } } catch (IOException e) { throw new PrestoException(HIVE_METASTORE_ERROR, "Could not validate external partition location", e); } } else { throw new PrestoException(NOT_SUPPORTED, "Partitions can not be added to " + table.getTableType()); } }
@Test public void testConvertPartition() { com.facebook.presto.hive.metastore.Partition prestoPartition = GlueToPrestoConverter.convertPartition(testPartition); assertEquals(prestoPartition.getDatabaseName(), testPartition.getDatabaseName()); assertEquals(prestoPartition.getTableName(), testPartition.getTableName()); assertColumnList(prestoPartition.getColumns(), testPartition.getStorageDescriptor().getColumns()); assertEquals(prestoPartition.getValues(), testPartition.getValues()); assertStorage(prestoPartition.getStorage(), testPartition.getStorageDescriptor()); assertEquals(prestoPartition.getParameters(), testPartition.getParameters()); }
private void prepareAddPartition(HdfsContext context, PartitionAndMore partitionAndMore) { deleteOnly = false; Partition partition = partitionAndMore.getPartition(); String targetLocation = partition.getStorage().getLocation(); Path currentPath = partitionAndMore.getCurrentLocation(); Path targetPath = new Path(targetLocation); SchemaTableName schemaTableName = new SchemaTableName(partition.getDatabaseName(), partition.getTableName()); PartitionAdder partitionAdder = partitionAdders.computeIfAbsent( schemaTableName, ignored -> new PartitionAdder(partition.getDatabaseName(), partition.getTableName(), delegate, PARTITION_COMMIT_BATCH_SIZE)); if (pathExists(context, hdfsEnvironment, currentPath)) { if (!targetPath.equals(currentPath)) { renameDirectory( context, hdfsEnvironment, currentPath, targetPath, () -> cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, true))); } } else { cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, true)); createDirectory(context, hdfsEnvironment, targetPath); } String partitionName = getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues()); partitionAdder.addPartition(new PartitionWithStatistics(partition, partitionName, partitionAndMore.getStatisticsUpdate())); }
@Override public void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) { PartitionStatistics currentStatistics = getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName); if (currentStatistics == null) { throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partitionName); } PartitionStatistics updatedStatistics = update.apply(currentStatistics); if (!updatedStatistics.getColumnStatistics().isEmpty()) { throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics"); } List<String> partitionValues = toPartitionValues(partitionName); Partition partition = getPartition(databaseName, tableName, partitionValues) .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues)); try { PartitionInput partitionInput = GlueInputConverter.convertPartition(partition); partitionInput.setParameters(updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics())); glueClient.updatePartition(new UpdatePartitionRequest() .withDatabaseName(databaseName) .withTableName(tableName) .withPartitionValueList(partition.getValues()) .withPartitionInput(partitionInput)); } catch (EntityNotFoundException e) { throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues); } catch (AmazonServiceException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
public static org.apache.hadoop.hive.metastore.api.Partition toMetastoreApiPartition(Partition partition) { org.apache.hadoop.hive.metastore.api.Partition result = new org.apache.hadoop.hive.metastore.api.Partition(); result.setDbName(partition.getDatabaseName()); result.setTableName(partition.getTableName()); result.setValues(partition.getValues()); result.setSd(makeStorageDescriptor(partition.getTableName(), partition.getColumns(), partition.getStorage())); result.setParameters(partition.getParameters()); return result; }
private void eraseStatistics(SchemaTableName schemaTableName) { ExtendedHiveMetastore metastoreClient = getMetastoreClient(); metastoreClient.updateTableStatistics(schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); Table table = metastoreClient.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(schemaTableName)); List<String> partitionColumns = table.getPartitionColumns().stream() .map(Column::getName) .collect(toImmutableList()); if (!table.getPartitionColumns().isEmpty()) { List<String> partitionNames = metastoreClient.getPartitionNames(schemaTableName.getSchemaName(), schemaTableName.getTableName()) .orElse(ImmutableList.of()); List<Partition> partitions = metastoreClient .getPartitionsByNames(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames) .entrySet() .stream() .map(Map.Entry::getValue) .filter(Optional::isPresent) .map(Optional::get) .collect(toImmutableList()); for (Partition partition : partitions) { metastoreClient.updatePartitionStatistics( schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); } } }
@Override public void verifyAndCleanup(SchemaTableName tableName) { // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. ExtendedHiveMetastore metastoreClient = getMetastoreClient(); Optional<Partition> actualPartition = metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), toPartitionValues(partitionNameToConflict)); // Make sure the partition inserted to trigger conflict was not overwritten // Checking storage location is sufficient because implement never uses .../pk1=a/pk2=a2 as the directory for partition [b, b2]. assertEquals(actualPartition.get().getStorage().getLocation(), conflictPartition.getStorage().getLocation()); metastoreClient.dropPartition(tableName.getSchemaName(), tableName.getTableName(), conflictPartition.getValues(), false); } }