/** * Get the {@link IMetaStoreClient} with the provided name. * @throws IOException */ public IMetaStoreClient getClient(String name) throws IOException { if (!this.clients.containsKey(name)) { throw new IOException("There is no client with name " + name); } return this.clients.get(name).get(); }
/** * @return an auto returnable wrapper around a {@link IMetaStoreClient}. * @throws IOException * Note: if you must acquire multiple locks, please use {@link #safeGetClients} instead, as this call may deadlock. */ public AutoReturnableObject<IMetaStoreClient> getClient() throws IOException { return new AutoReturnableObject<>(this.pool); }
@VisibleForTesting public Optional<Partition> getPartitionObject(String completePartitionName) { try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { List<String> partitionList = At_SPLITTER.splitToList(completePartitionName); if (partitionList.size() != 3) { log.warn("Invalid partition name " + completePartitionName); return Optional.<Partition>absent(); } Partition sourcePartition = client.get().getPartition(partitionList.get(0), partitionList.get(1), partitionList.get(2)); return Optional.fromNullable(sourcePartition); } catch (IOException | TException e) { log.warn("Unable to get partition object from metastore for partition " + completePartitionName); } return Optional.<Partition>absent(); }
@VisibleForTesting public boolean addPartition(Partition destPartition, String completePartitionName) { try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { client.get().add_partition(destPartition); return true; } catch (IOException | TException e) { log.warn("Unable to add Partition " + completePartitionName); } return false; }
@VisibleForTesting public boolean dropPartition(String completePartitionName) { List<String> partitionList = At_SPLITTER.splitToList(completePartitionName); if (partitionList.size() != 3) { log.warn("Invalid partition name " + completePartitionName); return false; } try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { client.get().dropPartition(partitionList.get(0), partitionList.get(1), partitionList.get(2), false); return true; } catch (IOException | TException e) { log.warn("Unable to drop Partition " + completePartitionName); } return false; }
@Override public boolean createDbIfNotExists(String dbName) throws IOException { try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { return createDbIfNotExists(client.get(), dbName); } }
/** * Get all tables in db with given table pattern. */ public Collection<DbAndTable> getTables() throws IOException { List<DbAndTable> tables = Lists.newArrayList(); try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { Iterable<String> databases = Iterables.filter(client.get().getAllDatabases(), new Predicate<String>() { @Override public boolean apply(String db) { return HiveDatasetFinder.this.whitelistBlacklist.acceptDb(db); } }); for (final String db : databases) { Iterable<String> tableNames = Iterables.filter(client.get().getAllTables(db), new Predicate<String>() { @Override public boolean apply(String table) { return HiveDatasetFinder.this.whitelistBlacklist.acceptTable(db, table); } }); for (String tableName : tableNames) { tables.add(new DbAndTable(db, tableName)); } } } catch (Exception exc) { throw new IOException(exc); } return tables; }
@Override public boolean existsTable(String dbName, String tableName) throws IOException { if (this.optimizedChecks && this.tableAndDbExistenceCache.getIfPresent(dbName + ":" + tableName ) != null ) { return true; } try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { try (Timer.Context context = this.metricContext.timer(TABLE_EXISTS).time()) { return client.get().tableExists(dbName, tableName); } } catch (TException e) { throw new IOException(String.format("Unable to check existence of table %s in db %s", tableName, dbName), e); } }
private Pair<Optional<org.apache.hadoop.hive.metastore.api.Table>, Optional<List<Partition>>> getDestinationTableMeta(String dbName, String tableName, Properties props) { Optional<org.apache.hadoop.hive.metastore.api.Table> table = Optional.absent(); Optional<List<Partition>> partitions = Optional.absent(); try { try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { table = Optional.of(client.get().getTable(dbName, tableName)); if (table.isPresent()) { org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get()); if (HiveUtils.isPartitioned(qlTable)) { partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String> absent())); } } } } catch (NoSuchObjectException e) { return ImmutablePair.of(table, partitions); } catch (IOException | TException e) { throw new RuntimeException("Could not fetch destination table metadata", e); } return ImmutablePair.of(table, partitions); } }
@Override public boolean existsPartition(String dbName, String tableName, List<Column> partitionKeys, List<String> partitionValues) throws IOException { try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { try (Timer.Context context = this.metricContext.timer(GET_HIVE_PARTITION).time()) { client.get().getPartition(dbName, tableName, partitionValues); } return true; } catch (NoSuchObjectException e) { return false; } catch (TException e) { throw new IOException(String.format("Unable to check existence of partition %s in table %s in db %s", partitionValues, tableName, dbName), e); } }
@Override public Optional<HiveTable> getTable(String dbName, String tableName) throws IOException { try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { Table hiveTable; try (Timer.Context context = this.metricContext.timer(GET_HIVE_TABLE).time()) { hiveTable = client.get().getTable(dbName, tableName); } return Optional.of(HiveMetaStoreUtils.getHiveTable(hiveTable)); } catch (NoSuchObjectException e) { return Optional.<HiveTable> absent(); } catch (TException e) { throw new IOException("Unable to get table " + tableName + " in db " + dbName, e); } }
@Override public Optional<HivePartition> getPartition(String dbName, String tableName, List<Column> partitionKeys, List<String> partitionValues) throws IOException { try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { Partition hivePartition; try (Timer.Context context = this.metricContext.timer(GET_HIVE_PARTITION).time()) { hivePartition = client.get().getPartition(dbName, tableName, partitionValues); } return Optional.of(HiveMetaStoreUtils.getHivePartition(hivePartition)); } catch (NoSuchObjectException e) { return Optional.<HivePartition> absent(); } catch (TException e) { throw new IOException( "Unable to get partition " + partitionValues + " from table " + tableName + " in db " + dbName, e); } }
@Override public void run() throws Exception { Iterator<HiveDataset> iterator = this.datasetFinder.getDatasetsIterator(); while (iterator.hasNext()) { ConvertibleHiveDataset hiveDataset = (ConvertibleHiveDataset) iterator.next(); try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) { Set<Partition> sourcePartitions = new HashSet<>(HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String>absent())); sourcePartitions.parallelStream().filter(partition -> isUnixTimeStamp(partition.getDataLocation().getName())) .forEach(partition -> { Arrays.stream(listFiles(partition.getDataLocation().getParent())).filter( fileStatus -> !fileStatus.getPath().toString() .equalsIgnoreCase(partition.getDataLocation().toString())).forEach(fileStatus -> { deletePath(fileStatus, this.graceTimeInMillis, true); }); }); } } }
public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException { super(state); if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) { log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY)); return; } try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) { Table table = client.get().getTable(this.dbName, this.tableName); String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY)); List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString); String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA)); List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString); Path dataFilesPath = new Path(table.getSd().getLocation()); compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties()); } }
List<Partition> partitions = HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String> absent()); return Lists.newArrayList(Iterables.filter(Iterables.transform(partitions, new Function<Partition, HiveDatasetVersion>() {
/** * This method returns a sorted list of partitions. */ public List<Partition> getPartitionsFromDataset() throws IOException{ try (AutoReturnableObject<IMetaStoreClient> client = getClientPool().getClient()) { List<Partition> partitions = HiveUtils.getPartitions(client.get(), getTable(), Optional.<String>absent()); return sortPartitions(partitions); } }
for (URI uri : importedBy) { String dbName = new Path(uri).getParent().getName(); Table table = new Table(client.get().getTable(dbName, new Path(uri).getName())); for (org.apache.hadoop.hive.metastore.api.Partition partition : client.get() .listPartitions(dbName, table.getTableName(), maxParts)) { partitions.add(new Partition(table, partition));
private Pair<Optional<Table>, Optional<List<Partition>>> getDestinationTableMeta(String dbName, String tableName, WorkUnitState state) throws DataConversionException { Optional<Table> table = Optional.<Table>absent(); Optional<List<Partition>> partitions = Optional.<List<Partition>>absent(); try { HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { table = Optional.of(client.get().getTable(dbName, tableName)); if (table.isPresent()) { org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get()); if (HiveUtils.isPartitioned(qlTable)) { partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String>absent())); } } } } catch (NoSuchObjectException e) { return ImmutablePair.of(table, partitions); } catch (IOException | TException e) { throw new DataConversionException("Could not fetch destination table metadata", e); } return ImmutablePair.of(table, partitions); }
@Override protected void registerPath(HiveSpec spec) throws IOException { try (Timer.Context context = this.metricContext.timer(PATH_REGISTER_TIMER).time(); AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) { Table table = HiveMetaStoreUtils.getTable(spec.getTable()); createDbIfNotExists(client.get(), table.getDbName()); createOrAlterTable(client.get(), table, spec); Optional<HivePartition> partition = spec.getPartition(); if (partition.isPresent()) { addOrAlterPartition(client.get(), table, HiveMetaStoreUtils.getPartition(partition.get()), spec); } HiveMetaStoreEventHelper.submitSuccessfulPathRegistration(eventSubmitter, spec); } catch (TException e) { HiveMetaStoreEventHelper.submitFailedPathRegistration(eventSubmitter, spec, e); throw new IOException(e); } }
public HiveConvertExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException { super(state); if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) { log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY)); return; } if (!(this.hiveDataset instanceof ConvertibleHiveDataset)) { throw new IllegalStateException("HiveConvertExtractor is only compatible with ConvertibleHiveDataset"); } ConvertibleHiveDataset convertibleHiveDataset = (ConvertibleHiveDataset) this.hiveDataset; try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) { Table table = client.get().getTable(this.dbName, this.tableName); SchemaAwareHiveTable schemaAwareHiveTable = new SchemaAwareHiveTable(table, AvroSchemaManager.getSchemaFromUrl(this.hiveWorkUnit.getTableSchemaUrl(), fs)); SchemaAwareHivePartition schemaAwareHivePartition = null; if (this.hiveWorkUnit.getPartitionName().isPresent() && this.hiveWorkUnit.getPartitionSchemaUrl().isPresent()) { Partition partition = client.get().getPartition(this.dbName, this.tableName, this.hiveWorkUnit.getPartitionName().get()); schemaAwareHivePartition = new SchemaAwareHivePartition(table, partition, AvroSchemaManager.getSchemaFromUrl(this.hiveWorkUnit.getPartitionSchemaUrl().get(), fs)); } QueryBasedHiveConversionEntity entity = new QueryBasedHiveConversionEntity(convertibleHiveDataset, schemaAwareHiveTable, Optional.fromNullable(schemaAwareHivePartition)); this.conversionEntities.add(entity); } }