private void checkPartitionedTableCompatibility(Table desiredTargetTable, Table existingTargetTable) throws IOException { if (!desiredTargetTable.getDataLocation().equals(existingTargetTable.getDataLocation())) { throw new HiveTableLocationNotMatchException(desiredTargetTable.getDataLocation(), existingTargetTable.getDataLocation()); } if (HiveUtils.isPartitioned(desiredTargetTable) != HiveUtils.isPartitioned(existingTargetTable)) { throw new IOException(String.format( "%s: Desired target table %s partitioned, existing target table %s partitioned. Tables are incompatible.", this.dataset.tableIdentifier, HiveUtils.isPartitioned(desiredTargetTable) ? "is" : "is not", HiveUtils.isPartitioned(existingTargetTable) ? "is" : "is not")); } if (desiredTargetTable.isPartitioned() && !desiredTargetTable.getPartitionKeys().equals(existingTargetTable.getPartitionKeys())) { throw new IOException(String.format( "%s: Desired target table has partition keys %s, existing target table has partition keys %s. " + "Tables are incompatible.", this.dataset.tableIdentifier, gson.toJson(desiredTargetTable.getPartitionKeys()), gson.toJson(existingTargetTable.getPartitionKeys()))); } }
if (HiveUtils.isPartitioned(hiveDataset.getTable())) { processPartitionedTable(hiveDataset, client); } else {
if (!HiveUtils.isPartitioned(hiveDataset.getTable())) { throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { this.beginGetWorkunitsTime = System.currentTimeMillis(); initialize(state); EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.CONVERSION_FIND_HIVE_TABLES_EVENT); Iterator<HiveDataset> iterator = this.datasetFinder.getDatasetsIterator(); while (iterator.hasNext()) { HiveDataset hiveDataset = iterator.next(); try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) { log.debug(String.format("Processing dataset: %s", hiveDataset)); // Create workunits for partitions if (HiveUtils.isPartitioned(hiveDataset.getTable()) && state.getPropAsBoolean(HIVE_SOURCE_CREATE_WORKUNITS_FOR_PARTITIONS, DEFAULT_HIVE_SOURCE_CREATE_WORKUNITS_FOR_PARTITIONS)) { createWorkunitsForPartitionedTable(hiveDataset, client); } else { createWorkunitForNonPartitionedTable(hiveDataset); } } } } catch (IOException e) { throw new RuntimeException(e); } int realWorkunits = this.workunits.size(); this.watermarker.onGetWorkunitsEnd(this.workunits); log.info(String.format("Created %s real workunits and %s watermark workunits", realWorkunits, (this.workunits.size() - realWorkunits))); return this.workunits; }
if (table.isPresent()) { org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get()); if (HiveUtils.isPartitioned(qlTable)) { partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String>absent()));
/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }
checkPartitionedTableCompatibility(this.targetTable, this.existingTargetTable.get()); if (HiveUtils.isPartitioned(this.dataset.table)) { this.sourcePartitions = HiveUtils.getPartitionsMap(multiClient.getClient(source_client), this.dataset.table, this.partitionFilter, this.hivePartitionExtendedFilter);
if (!HiveUtils.isPartitioned(new org.apache.hadoop.hive.ql.metadata.Table(client.get().getTable( tableKey.split("@")[0], tableKey.split("@")[1])))) { continue;
private void checkPartitionedTableCompatibility(Table desiredTargetTable, Table existingTargetTable) throws IOException { if (!desiredTargetTable.getDataLocation().equals(existingTargetTable.getDataLocation())) { throw new HiveTableLocationNotMatchException(desiredTargetTable.getDataLocation(), existingTargetTable.getDataLocation()); } if (HiveUtils.isPartitioned(desiredTargetTable) != HiveUtils.isPartitioned(existingTargetTable)) { throw new IOException(String.format( "%s: Desired target table %s partitioned, existing target table %s partitioned. Tables are incompatible.", this.dataset.tableIdentifier, HiveUtils.isPartitioned(desiredTargetTable) ? "is" : "is not", HiveUtils.isPartitioned(existingTargetTable) ? "is" : "is not")); } if (desiredTargetTable.isPartitioned() && !desiredTargetTable.getPartitionKeys().equals(existingTargetTable.getPartitionKeys())) { throw new IOException(String.format( "%s: Desired target table has partition keys %s, existing target table has partition keys %s. " + "Tables are incompatible.", this.dataset.tableIdentifier, gson.toJson(desiredTargetTable.getPartitionKeys()), gson.toJson(existingTargetTable.getPartitionKeys()))); } }
if (HiveUtils.isPartitioned(hiveDataset.getTable())) { processPartitionedTable(hiveDataset, client); } else {
if (!HiveUtils.isPartitioned(hiveDataset.getTable())) { throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
if (table.isPresent()) { org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get()); if (HiveUtils.isPartitioned(qlTable)) { partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String>absent()));
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { this.beginGetWorkunitsTime = System.currentTimeMillis(); initialize(state); EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.CONVERSION_FIND_HIVE_TABLES_EVENT); Iterator<HiveDataset> iterator = this.datasetFinder.getDatasetsIterator(); while (iterator.hasNext()) { HiveDataset hiveDataset = iterator.next(); try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) { log.debug(String.format("Processing dataset: %s", hiveDataset)); // Create workunits for partitions if (HiveUtils.isPartitioned(hiveDataset.getTable()) && state.getPropAsBoolean(HIVE_SOURCE_CREATE_WORKUNITS_FOR_PARTITIONS, DEFAULT_HIVE_SOURCE_CREATE_WORKUNITS_FOR_PARTITIONS)) { createWorkunitsForPartitionedTable(hiveDataset, client); } else { createWorkunitForNonPartitionedTable(hiveDataset); } } } } catch (IOException e) { throw new RuntimeException(e); } int realWorkunits = this.workunits.size(); this.watermarker.onGetWorkunitsEnd(this.workunits); log.info(String.format("Created %s real workunits and %s watermark workunits", realWorkunits, (this.workunits.size() - realWorkunits))); return this.workunits; }
/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }
checkPartitionedTableCompatibility(this.targetTable, this.existingTargetTable.get()); if (HiveUtils.isPartitioned(this.dataset.table)) { this.sourcePartitions = HiveUtils.getPartitionsMap(multiClient.getClient(source_client), this.dataset.table, this.partitionFilter, this.hivePartitionExtendedFilter);
if (!HiveUtils.isPartitioned(new org.apache.hadoop.hive.ql.metadata.Table(client.get().getTable( tableKey.split("@")[0], tableKey.split("@")[1])))) { continue;