/** * Gets the number of buckets. * * @return the number of buckets */ public int getNumBuckets() { return this.sd.getNumBuckets(); }
/** * Gets the number of buckets. * * @return the number of buckets */ public int getNumBuckets() { return this.sd.getNumBuckets(); }
/** * Returns {@code 0} until such a time that the table described by the {@link #getDatabaseName() database_name} * {@code .}{@link #getTableName() table_name} has been resolved with the meta store database (when * {@link MutatorClient#connect()} exits), at which point this will then return the corresponding * {@link StorageDescriptor#getNumBuckets() total bucket count}. */ public int getTotalBuckets() { return table != null ? table.getSd().getNumBuckets() : 0; }
public int getNumBuckets() { return tTable.getSd().getNumBuckets(); }
public int getBucketCount() { return tPartition.getSd().getNumBuckets(); /* * TODO: Keeping this code around for later use when we will support * sampling on tables which are not created with CLUSTERED INTO clause * * // read from table meta data int numBuckets = this.table.getNumBuckets(); * if (numBuckets == -1) { // table meta data does not have bucket * information // check if file system has multiple buckets(files) in this * partition String pathPattern = this.partPath.toString() + "/*"; try { * FileSystem fs = FileSystem.get(this.table.getDataLocation(), * Hive.get().getConf()); FileStatus srcs[] = fs.globStatus(new * Path(pathPattern), FileUtils.HIDDEN_FILES_PATH_FILTER); numBuckets = srcs.length; } catch (Exception e) { * throw new RuntimeException("Cannot get bucket count for table " + * this.table.getName(), e); } } return numBuckets; */ }
public int getNumBuckets() { return tTable.getSd().getNumBuckets(); }
public int getBucketCount() { return tPartition.getSd().getNumBuckets(); /* * TODO: Keeping this code around for later use when we will support * sampling on tables which are not created with CLUSTERED INTO clause * * // read from table meta data int numBuckets = this.table.getNumBuckets(); * if (numBuckets == -1) { // table meta data does not have bucket * information // check if file system has multiple buckets(files) in this * partition String pathPattern = this.partPath.toString() + "/*"; try { * FileSystem fs = FileSystem.get(this.table.getDataLocation(), * Hive.get().getConf()); FileStatus srcs[] = fs.globStatus(new * Path(pathPattern), FileUtils.HIDDEN_FILES_PATH_FILTER); numBuckets = srcs.length; } catch (Exception e) { * throw new RuntimeException("Cannot get bucket count for table " + * this.table.getName(), e); } } return numBuckets; */ }
public static Optional<HiveBucketProperty> fromStorageDescriptor(StorageDescriptor storageDescriptor, String tablePartitionName) { boolean bucketColsSet = storageDescriptor.isSetBucketCols() && !storageDescriptor.getBucketCols().isEmpty(); boolean numBucketsSet = storageDescriptor.isSetNumBuckets() && storageDescriptor.getNumBuckets() > 0; if (!numBucketsSet) { // In Hive, a table is considered as not bucketed when its bucketCols is set but its numBucket is not set. return Optional.empty(); } if (!bucketColsSet) { throw new PrestoException(HIVE_INVALID_METADATA, "Table/partition metadata has 'numBuckets' set, but 'bucketCols' is not set: " + tablePartitionName); } List<SortingColumn> sortedBy = ImmutableList.of(); if (storageDescriptor.isSetSortCols()) { sortedBy = storageDescriptor.getSortCols().stream() .map(order -> SortingColumn.fromMetastoreApiOrder(order, tablePartitionName)) .collect(toImmutableList()); } return Optional.of(new HiveBucketProperty(storageDescriptor.getBucketCols(), storageDescriptor.getNumBuckets(), sortedBy)); }
protected void prepareBucketingFields() { this.isBucketed = table.getSd().getNumBuckets() > 0; // For unbucketed tables we have exactly 1 RecordUpdater (until HIVE-19208) for each AbstractRecordWriter which // ends up writing to a file bucket_000000. // See also {@link #getBucket(Object)} this.totalBuckets = isBucketed ? table.getSd().getNumBuckets() : 1; if (isBucketed) { this.bucketIds = getBucketColIDs(table.getSd().getBucketCols(), table.getSd().getCols()); this.bucketFieldData = new Object[bucketIds.size()]; this.bucketObjInspectors = getObjectInspectorsForBucketedCols(bucketIds, inputRowObjectInspector); this.bucketStructFields = new StructField[bucketIds.size()]; List<? extends StructField> allFields = inputRowObjectInspector.getAllStructFieldRefs(); for (int i = 0; i < bucketIds.size(); i++) { bucketStructFields[i] = allFields.get(bucketIds.get(i)); } } }
@Before public void configureMocks() throws Exception { when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME_1)).thenReturn(mockTable1); when(mockTable1.getDbName()).thenReturn(DB_NAME); when(mockTable1.getTableName()).thenReturn(TABLE_NAME_1); when(mockTable1.getSd()).thenReturn(mockSd); when(mockTable1.getParameters()).thenReturn(mockParameters); when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME_2)).thenReturn(mockTable2); when(mockTable2.getDbName()).thenReturn(DB_NAME); when(mockTable2.getTableName()).thenReturn(TABLE_NAME_2); when(mockTable2.getSd()).thenReturn(mockSd); when(mockTable2.getParameters()).thenReturn(mockParameters); when(mockSd.getNumBuckets()).thenReturn(1, 2); when(mockSd.getOutputFormat()).thenReturn(OrcOutputFormat.class.getName()); when(mockParameters.get("transactional")).thenReturn(Boolean.TRUE.toString()); when(mockMetaStoreClient.openTxn(USER)).thenReturn(TRANSACTION_ID); when(mockMetaStoreClient.allocateTableWriteId(TRANSACTION_ID, DB_NAME, TABLE_NAME_1)).thenReturn(WRITE_ID1); when(mockMetaStoreClient.allocateTableWriteId(TRANSACTION_ID, DB_NAME, TABLE_NAME_2)).thenReturn(WRITE_ID2); client = new MutatorClient(mockMetaStoreClient, mockConfiguration, mockLockFailureListener, USER, Collections.singletonList(TABLE_1)); }
@Test public void testCheckUnBucketedTableConnect() throws Exception { when(mockSd.getNumBuckets()).thenReturn(0); try { client.connect(); fail(); } catch (ConnectionException e) { } assertThat(client.isConnected(), is(false)); }
private void checkTable(IMetaStoreClient metaStoreClient, AcidTable acidTable) throws ConnectionException { try { LOG.debug("Checking table {}.", acidTable.getQualifiedName()); Table metaStoreTable = metaStoreClient.getTable(acidTable.getDatabaseName(), acidTable.getTableName()); if (acidTable.getTableType() == TableType.SINK) { Map<String, String> parameters = metaStoreTable.getParameters(); if (!Boolean.parseBoolean(parameters.get(TRANSACTIONAL_PARAM_KEY))) { throw new ConnectionException("Cannot stream to table that is not transactional: '" + acidTable.getQualifiedName() + "'."); } int totalBuckets = metaStoreTable.getSd().getNumBuckets(); LOG.debug("Table {} has {} buckets.", acidTable.getQualifiedName(), totalBuckets); if (totalBuckets <= 0) { throw new ConnectionException("Cannot stream to table that has not been bucketed: '" + acidTable.getQualifiedName() + "'."); } String outputFormat = metaStoreTable.getSd().getOutputFormat(); LOG.debug("Table {} has {} OutputFormat.", acidTable.getQualifiedName(), outputFormat); acidTable.setTable(metaStoreTable); } } catch (NoSuchObjectException e) { throw new ConnectionException("Invalid table '" + acidTable.getQualifiedName() + "'", e); } catch (TException e) { throw new ConnectionException("Error communicating with the meta store", e); } LOG.debug("Table {} OK.", acidTable.getQualifiedName()); }
job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
public Object getFieldValue(_Fields field) { switch (field) { case COLS: return getCols(); case LOCATION: return getLocation(); case INPUT_FORMAT: return getInputFormat(); case OUTPUT_FORMAT: return getOutputFormat(); case COMPRESSED: return isCompressed(); case NUM_BUCKETS: return getNumBuckets(); case SERDE_INFO: return getSerdeInfo(); case BUCKET_COLS: return getBucketCols(); case SORT_COLS: return getSortCols(); case PARAMETERS: return getParameters(); case SKEWED_INFO: return getSkewedInfo(); case STORED_AS_SUB_DIRECTORIES: return isStoredAsSubDirectories(); } throw new IllegalStateException(); }
public StorageDescriptorWrapper(StorageDescriptor storageDescriptor) { sd = storageDescriptor; location = storageDescriptor.getLocation(); inputFormat = storageDescriptor.getInputFormat(); outputFormat = storageDescriptor.getOutputFormat(); compressed = storageDescriptor.isCompressed(); numBuckets = storageDescriptor.getNumBuckets(); serDeInfo = new SerDeInfoWrapper(storageDescriptor.getSerdeInfo()); if (sd.getSortCols() != null) { sortCols = Lists.newArrayList(); for (Order order : sd.getSortCols()) { sortCols.add(new OrderWrapper(order)); } } parameters = storageDescriptor.getParameters(); if (sd.getCols() != null) { this.columns = Lists.newArrayList(); for (FieldSchema fieldSchema : sd.getCols()) { this.columns.add(new FieldSchemaWrapper(fieldSchema)); } } }
JobConf job = new JobConf(); job.set("mapred.input.dir", partitionLocation.toString()); job.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(table.getSd().getNumBuckets())); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg"); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
private static State getStorageProps(StorageDescriptor sd) { State storageProps = new State(); for (Map.Entry<String, String> entry : sd.getParameters().entrySet()) { storageProps.setProp(entry.getKey(), entry.getValue()); } if (sd.isSetLocation()) { storageProps.setProp(HiveConstants.LOCATION, sd.getLocation()); } if (sd.isSetInputFormat()) { storageProps.setProp(HiveConstants.INPUT_FORMAT, sd.getInputFormat()); } if (sd.isSetOutputFormat()) { storageProps.setProp(HiveConstants.OUTPUT_FORMAT, sd.getOutputFormat()); } if (sd.isSetCompressed()) { storageProps.setProp(HiveConstants.COMPRESSED, sd.isCompressed()); } if (sd.isSetNumBuckets()) { storageProps.setProp(HiveConstants.NUM_BUCKETS, sd.getNumBuckets()); } if (sd.isSetBucketCols()) { for (String bucketColumn : sd.getBucketCols()) { storageProps.appendToListProp(HiveConstants.BUCKET_COLUMNS, bucketColumn); } } if (sd.isSetStoredAsSubDirectories()) { storageProps.setProp(HiveConstants.STORED_AS_SUB_DIRS, sd.isStoredAsSubDirectories()); } return storageProps; }
private static AddPartitionDesc getBaseAddPartitionDescFromPartition( Path fromPath, String dbName, ImportTableDesc tblDesc, Partition partition, ReplicationSpec replicationSpec, HiveConf conf) throws MetaException, SemanticException { AddPartitionDesc partsDesc = new AddPartitionDesc(dbName, tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters()); AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0); partDesc.setInputFormat(partition.getSd().getInputFormat()); partDesc.setOutputFormat(partition.getSd().getOutputFormat()); partDesc.setNumBuckets(partition.getSd().getNumBuckets()); partDesc.setCols(partition.getSd().getCols()); partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib()); partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters()); partDesc.setBucketCols(partition.getSd().getBucketCols()); partDesc.setSortCols(partition.getSd().getSortCols()); if (replicationSpec.isInReplicationScope() && tblDesc.isExternal() && !replicationSpec.isMigratingToExternalTable()) { String newLocation = ReplExternalTables .externalTableLocation(conf, partition.getSd().getLocation()); LOG.debug("partition {} has data location: {}", partition, newLocation); partDesc.setLocation(newLocation); } else { partDesc.setLocation(new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString()); } return partsDesc; }
private static AddPartitionDesc getBaseAddPartitionDescFromPartition( Path fromPath, String dbname, ImportTableDesc tblDesc, Partition partition) throws MetaException, SemanticException { AddPartitionDesc partsDesc = new AddPartitionDesc(dbname, tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters()); AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0); partDesc.setInputFormat(partition.getSd().getInputFormat()); partDesc.setOutputFormat(partition.getSd().getOutputFormat()); partDesc.setNumBuckets(partition.getSd().getNumBuckets()); partDesc.setCols(partition.getSd().getCols()); partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib()); partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters()); partDesc.setBucketCols(partition.getSd().getBucketCols()); partDesc.setSortCols(partition.getSd().getSortCols()); partDesc.setLocation(new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString()); return partsDesc; }
/** * Converts a storage descriptor to a db-backed storage descriptor. It points the * storage descriptor's column descriptor to the one passed as an argument, * so it does not create a new mcolumn descriptor object. * @param sd the storage descriptor to wrap in a db-backed object * @param mcd the db-backed column descriptor * @return the db-backed storage descriptor object * @throws MetaException */ private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd, MColumnDescriptor mcd) throws MetaException { if (sd == null) { return null; } return new MStorageDescriptor(mcd, sd .getLocation(), sd.getInputFormat(), sd.getOutputFormat(), sd .isCompressed(), sd.getNumBuckets(), convertToMSerDeInfo(sd .getSerdeInfo()), sd.getBucketCols(), convertToMOrders(sd.getSortCols()), sd.getParameters(), (null == sd.getSkewedInfo()) ? null : sd.getSkewedInfo().getSkewedColNames(), convertToMStringLists((null == sd.getSkewedInfo()) ? null : sd.getSkewedInfo() .getSkewedColValues()), covertToMapMStringList((null == sd.getSkewedInfo()) ? null : sd.getSkewedInfo() .getSkewedColValueLocationMaps()), sd.isStoredAsSubDirectories()); }