public TableLikeStageableTableMetadata(Table referenceTable, String destinationDB, String destinationTableName, String targetDataPath) { super(destinationTableName, destinationTableName + "_STAGING", destinationDB, targetDataPath, getTableProperties(referenceTable), new ArrayList<>(), Optional.of(referenceTable.getNumBuckets()), new Properties(), false, Optional.absent(), new ArrayList<>()); }
public TableLikeStageableTableMetadata(Table referenceTable, Config config) { super(HiveDataset.resolveTemplate(config.getString(StageableTableMetadata.DESTINATION_TABLE_KEY), referenceTable), HiveDataset.resolveTemplate(config.getString(StageableTableMetadata.DESTINATION_TABLE_KEY), referenceTable) + "_STAGING", HiveDataset.resolveTemplate(config.getString(StageableTableMetadata.DESTINATION_DB_KEY), referenceTable), HiveDataset.resolveTemplate(config.getString(DESTINATION_DATA_PATH_KEY), referenceTable), getTableProperties(referenceTable), new ArrayList<>(), Optional.of(referenceTable.getNumBuckets()), new Properties(), false, Optional.absent(), new ArrayList<>()); }
@SuppressWarnings("nls") public FileStatus[] getSortedPaths() { try { // Previously, this got the filesystem of the Table, which could be // different from the filesystem of the partition. FileSystem fs = FileSystem.get(getPath().toUri(), SessionState.getSessionConf()); String pathPattern = getPath().toString(); if (getNumBuckets() > 0) { pathPattern = pathPattern + "/*"; } LOG.info("Path pattern = " + pathPattern); FileStatus srcs[] = fs.globStatus(new Path(pathPattern), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(srcs); for (FileStatus src : srcs) { LOG.info("Got file: " + src.getPath()); } if (srcs.length == 0) { return null; } return srcs; } catch (Exception e) { throw new RuntimeException("Cannot get path ", e); } }
@SuppressWarnings("nls") public FileStatus[] getSortedPaths() { try { // Previously, this got the filesystem of the Table, which could be // different from the filesystem of the partition. FileSystem fs = FileSystem.get(getPath().toUri(), SessionState.getSessionConf()); String pathPattern = getPath().toString(); if (getNumBuckets() > 0) { pathPattern = pathPattern + "/*"; } LOG.info("Path pattern = " + pathPattern); FileStatus srcs[] = fs.globStatus(new Path(pathPattern), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(srcs); for (FileStatus src : srcs) { LOG.info("Got file: " + src.getPath()); } if (srcs.length == 0) { return null; } return srcs; } catch (Exception e) { throw new RuntimeException("Cannot get path ", e); } }
private FileSinkDesc getFileSinkDesc(Path tempDirPath) { Table table = mock(Table.class); when(table.getNumBuckets()).thenReturn(NUM_BUCKETS); FileSinkDesc conf = new FileSinkDesc(tempDirPath, null, false); conf.setTable(table); return conf; }
/** * Remove all temporary files and duplicate (double-committed) files from a given directory. * * @return a list of path names corresponding to should-be-created empty buckets. */ public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, Set<Path> filesKept, boolean isBaseDir) throws IOException { int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0; return removeTempOrDuplicateFiles( fs, fileStats, null, dpLevels, numBuckets, hconf, null, 0, false, filesKept, isBaseDir); }
public Integer splitCount(HiveTableScan scan, RelMetadataQuery mq) { Integer splitCount; RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); List<String> bucketCols = table.getHiveTableMD().getBucketCols(); if (bucketCols != null && !bucketCols.isEmpty()) { splitCount = table.getHiveTableMD().getNumBuckets(); } else { splitCount = splitCountRepartition(scan, mq); if (splitCount == null) { throw new RuntimeException("Could not get split count for table: " + scan.getTable().getQualifiedName()); } } return splitCount; }
public boolean checkBucketedTable(Table tbl, ParseContext pGraphContext, PrunedPartitionList prunedParts) throws SemanticException { final int numBuckets = tbl.getNumBuckets(); if (numBuckets <= 0) { return false;
public boolean checkBucketedTable(Table tbl, ParseContext pGraphContext, PrunedPartitionList prunedParts) throws SemanticException { final int numBuckets = tbl.getNumBuckets(); if (numBuckets <= 0) { return false;
public Integer splitCount(HiveTableScan scan, RelMetadataQuery mq) { Integer splitCount; RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); List<String> bucketCols = table.getHiveTableMD().getBucketCols(); if (bucketCols != null && !bucketCols.isEmpty()) { splitCount = table.getHiveTableMD().getNumBuckets(); } else { splitCount = splitCountRepartition(scan, mq); if (splitCount == null) { throw new RuntimeException("Could not get split count for table: " + scan.getTable().getQualifiedName()); } } return splitCount; }
private boolean checkTable(Table table, List<Integer> bucketPositionsDest, List<Integer> sortPositionsDest, List<Integer> sortOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = table.getNumBuckets(); if (numBucketsDest != numBuckets) { return false; } List<Integer> tableBucketPositions = getBucketPositions(table.getBucketCols(), table.getCols()); List<Integer> sortPositions = getSortPositions(table.getSortCols(), table.getCols()); List<Integer> sortOrder = getSortOrder(table.getSortCols(), table.getCols()); return bucketPositionsDest.equals(tableBucketPositions) && sortPositionsDest.equals(sortPositions) && sortOrderDest.equals(sortOrder); }
private boolean checkTable(Table table, List<Integer> bucketPositionsDest, List<Integer> sortPositionsDest, List<Integer> sortOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = table.getNumBuckets(); if (numBucketsDest != numBuckets) { return false; } List<Integer> tableBucketPositions = getBucketPositions(table.getBucketCols(), table.getCols()); List<Integer> sortPositions = getSortPositions(table.getSortCols(), table.getCols()); List<Integer> sortOrder = getSortOrder(table.getSortCols(), table.getCols()); return bucketPositionsDest.equals(tableBucketPositions) && sortPositionsDest.equals(sortPositions) && sortOrderDest.equals(sortOrder); }
if (isBucketed) { bucketColsList.add(table.getBucketCols()); numBuckets = table.getNumBuckets(); List<String> sortCols = new ArrayList<String>(); for (Order colSortOrder : table.getSortCols()) {
private void genPartnCols(String dest, Operator input, QB qb, TableDesc table_desc, Table dest_tab, SortBucketRSCtx ctx) throws SemanticException { boolean enforceBucketing = false; ArrayList<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>(); if ((dest_tab.getNumBuckets() > 0)) { enforceBucketing = true; if (updating(dest) || deleting(dest)) { partnColsNoConvert = getPartitionColsFromBucketColsForUpdateDelete(input, false); } else { partnColsNoConvert = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); } } if ((dest_tab.getSortCols() != null) && (dest_tab.getSortCols().size() > 0)) { if (!enforceBucketing) { throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); } else { if(!enforceBucketing) { partnColsNoConvert = getSortCols(dest, qb, dest_tab, table_desc, input, false); } } enforceBucketing = true; } if (enforceBucketing) { ctx.setPartnCols(partnColsNoConvert); } }
private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, Map<String, String> partSpec, String dest) throws SemanticException { List<FieldSchema> parts = dest_tab.getPartitionKeys(); if (parts == null || parts.isEmpty()) { return null; // table is not partitioned } if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg())); } DynamicPartitionCtx dpCtx = qbm.getDPCtx(dest); if (dpCtx == null) { dest_tab.validatePartColumnNames(partSpec, false); dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); qbm.setDPCtx(dest, dpCtx); } if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); } if ((dest_tab.getNumBuckets() > 0)) { dpCtx.setNumBuckets(dest_tab.getNumBuckets()); } return dpCtx; }
private void checkAcidConstraints(QB qb, TableDesc tableDesc, Table table) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } /* LOG.info("Modifying config values for ACID write"); conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true); conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); These props are now enabled elsewhere (see commit diffs). It would be better instead to throw if they are not set. For exmaple, if user has set hive.optimize.reducededuplication=false for some reason, we'll run a query contrary to what they wanted... But throwing now would be backwards incompatible. */ conf.set(AcidUtils.CONF_ACID_KEY, "true"); if (table.getNumBuckets() < 1) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); } if (table.getSortCols() != null && table.getSortCols().size() > 0) { throw new SemanticException(ErrorMsg.ACID_NO_SORTED_BUCKETS, table.getTableName()); } }
private void genPartnCols(String dest, Operator input, QB qb, TableDesc table_desc, Table dest_tab, SortBucketRSCtx ctx) throws SemanticException { boolean enforceBucketing = false; ArrayList<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>(); if ((dest_tab.getNumBuckets() > 0)) { enforceBucketing = true; if (updating(dest) || deleting(dest)) { partnColsNoConvert = getPartitionColsFromBucketColsForUpdateDelete(input, false); } else { partnColsNoConvert = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); } } if ((dest_tab.getSortCols() != null) && (dest_tab.getSortCols().size() > 0)) { if (!enforceBucketing && !dest_tab.isIndexTable()) { throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); } else { if(!enforceBucketing) { partnColsNoConvert = getSortCols(dest, qb, dest_tab, table_desc, input, false); } } enforceBucketing = true; } if (enforceBucketing) { ctx.setPartnCols(partnColsNoConvert); } }
lbLevels = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel(); int numBuckets = (conf.getTable() != null) ? conf.getTable().getNumBuckets() : (dpCtx != null ? dpCtx.getNumBuckets() : 0); MissingBucketsContext mbc = new MissingBucketsContext(
private void alterPartitionSpecInMemory(Table tbl, Map<String, String> partSpec, org.apache.hadoop.hive.metastore.api.Partition tpart, boolean inheritTableSpecs, String partPath) throws HiveException, InvalidOperationException { LOG.debug("altering partition for table " + tbl.getTableName() + " with partition spec : " + partSpec); if (inheritTableSpecs) { tpart.getSd().setOutputFormat(tbl.getTTable().getSd().getOutputFormat()); tpart.getSd().setInputFormat(tbl.getTTable().getSd().getInputFormat()); tpart.getSd().getSerdeInfo().setSerializationLib(tbl.getSerializationLib()); tpart.getSd().getSerdeInfo().setParameters( tbl.getTTable().getSd().getSerdeInfo().getParameters()); tpart.getSd().setBucketCols(tbl.getBucketCols()); tpart.getSd().setNumBuckets(tbl.getNumBuckets()); tpart.getSd().setSortCols(tbl.getSortCols()); } if (partPath == null || partPath.trim().equals("")) { throw new HiveException("new partition path should not be null or empty."); } tpart.getSd().setLocation(partPath); }
private void alterPartitionSpecInMemory(Table tbl, Map<String, String> partSpec, org.apache.hadoop.hive.metastore.api.Partition tpart, boolean inheritTableSpecs, String partPath) throws HiveException, InvalidOperationException { LOG.debug("altering partition for table " + tbl.getTableName() + " with partition spec : " + partSpec); if (inheritTableSpecs) { tpart.getSd().setOutputFormat(tbl.getTTable().getSd().getOutputFormat()); tpart.getSd().setInputFormat(tbl.getTTable().getSd().getInputFormat()); tpart.getSd().getSerdeInfo().setSerializationLib(tbl.getSerializationLib()); tpart.getSd().getSerdeInfo().setParameters( tbl.getTTable().getSd().getSerdeInfo().getParameters()); tpart.getSd().setBucketCols(tbl.getBucketCols()); tpart.getSd().setNumBuckets(tbl.getNumBuckets()); tpart.getSd().setSortCols(tbl.getSortCols()); } if (partPath == null || partPath.trim().equals("")) { throw new HiveException("new partition path should not be null or empty."); } tpart.getSd().setLocation(partPath); }