sb.append(generateDdlColumns(generateDdlRequest.businessObjectFormatEntity, businessObjectFormat)); .add(String.format("`%s` %s", partitionColumn.getName(), getHiveDataType(partitionColumn, generateDdlRequest.businessObjectFormatEntity))); escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getDelimiter(), true)), StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter()) ? "" : String.format(" ESCAPED BY '%s'", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter(), true))))); String.format(" NULL DEFINED AS '%s'\n", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getNullValue())))); String.format("STORED AS %s%s\n", getHiveFileFormat(generateDdlRequest.businessObjectFormatEntity), generateDdlRequest.isPartitioned ? ";\n" : ""));
@Override public String generateReplaceColumnsStatement(BusinessObjectFormatDdlRequest request, BusinessObjectFormatEntity businessObjectFormatEntity) { BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(businessObjectFormatEntity); assertSchemaColumnsNotEmpty(businessObjectFormat, businessObjectFormatEntity); StringBuilder builder = new StringBuilder(34); builder.append("ALTER TABLE `"); builder.append(request.getTableName()); builder.append("` REPLACE COLUMNS (\n"); builder.append(generateDdlColumns(businessObjectFormatEntity, businessObjectFormat)); return builder.toString().trim() + ';'; }
/** * Gets the DDL character value based on the specified configured character value. This method supports UTF-8 encoded strings and will "Hive" escape any * non-ASCII printable characters using '\(value)'. * * @param string the configured character value. * * @return the DDL character value. */ public String getDdlCharacterValue(String string) { return getDdlCharacterValue(string, false); }
storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities); !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames); List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters); throw new ObjectNotFoundException( String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities);
String pattern = hive13DdlGenerator.getHivePathPattern(autoDiscoverableSubPartitionColumns).pattern(); try hive13DdlGenerator.getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, TEST_S3_KEY_PREFIX, storageFilePaths, businessObjectDataEntity, STORAGE_NAME); fail("Should throw an IllegalArgumentException when storage file does not match the expected Hive sub-directory pattern.");
getHiveDataType(schemaColumn, businessObjectFormatEntity), StringUtils.isNotBlank(schemaColumn.getDescription()) ? String.format(" COMMENT '%s'", escapeSingleQuotes(schemaColumn.getDescription())) : ""));
assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity); generateStandardBaseDdl(generateDdlRequest, sb, businessObjectFormat, ifNotExistsOption); processPartitionFiltersForGenerateDdl(generateDdlRequest, sb, replacements, generateDdlRequest.businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption);
String s3BucketName = getS3BucketName(storageUnitEntity.getStorage(), generateDdlRequest.s3BucketNames); .subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormat.getSchema().getPartitions().size()); for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName()))
@Test public void testGetHivePartitionsMultiplePathsFound() { // Create a test business object data entity. BusinessObjectDataEntity businessObjectDataEntity = businessObjectDataDaoTestHelper .createBusinessObjectDataEntity(NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, PARTITION_VALUE, DATA_VERSION, true, BDATA_STATUS); List<SchemaColumn> autoDiscoverableSubPartitionColumns = getPartitionColumns(Arrays.asList("Column1", "column2")); List<String> partitionPaths = Arrays.asList("/COLUMN1=111/COLUMN2=222", "/column1=111/COLUMN2=222"); List<String> storageFilePaths = getStorageFilePaths(Arrays.asList(partitionPaths.get(0) + "/file.dat", partitionPaths.get(1) + "/file.dat")); try { hive13DdlGenerator .getHivePartitions(businessObjectDataHelper.getBusinessObjectDataKey(businessObjectDataEntity), autoDiscoverableSubPartitionColumns, TEST_S3_KEY_PREFIX, storageFilePaths, businessObjectDataEntity, STORAGE_NAME); fail("Should throw an IllegalArgumentException when multiple locations exist for the same Hive partition."); } catch (IllegalArgumentException e) { assertEquals(String.format("Found two different locations for the same Hive partition. " + "Storage: {%s}, business object data: {%s}, S3 key prefix: {%s}, path[1]: {%s}, path[2]: {%s}", STORAGE_NAME, businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity), TEST_S3_KEY_PREFIX, partitionPaths.get(0), partitionPaths.get(1)), e.getMessage()); } }
Pattern pattern = getHivePathPattern(autoDiscoverableSubPartitionColumns); for (String storageFile : storageFiles)
@Test public void testEscapeSingleQuotes() { // Create a test vector with key=input and value=output values. LinkedHashMap<String, String> testVector = new LinkedHashMap<>(); testVector.put("some text without single quotes", "some text without single quotes"); testVector.put("'some \\'text\\' with single 'quotes'", "\\'some \\'text\\' with single \\'quotes\\'"); testVector.put("'", "\\'"); testVector.put("''''", "\\'\\'\\'\\'"); testVector.put("'", "\\'"); testVector.put("'\'\\'", "\\'\\'\\'"); // Loop over all entries in the test vector. for (Object set : testVector.entrySet()) { Map.Entry<?, ?> entry = (Map.Entry<?, ?>) set; assertEquals(entry.getValue(), hive13DdlGenerator.escapeSingleQuotes((String) entry.getKey())); } }
/** * Generates the create table Hive 13 DDL as per specified business object format DDL request. * * @param request the business object format DDL request * @param businessObjectFormatEntity the business object format entity * @param customDdlEntity the optional custom DDL entity * * @return the create table Hive DDL */ @Override public String generateCreateTableDdl(BusinessObjectFormatDdlRequest request, BusinessObjectFormatEntity businessObjectFormatEntity, CustomDdlEntity customDdlEntity) { // If the partitionKey="partition", then DDL should return a DDL which treats business object data as a table, not a partition. Boolean isPartitioned = !businessObjectFormatEntity.getPartitionKey().equalsIgnoreCase(NO_PARTITIONING_PARTITION_KEY); // Generate the create table Hive 13 DDL. GenerateDdlRequest generateDdlRequest = new GenerateDdlRequest(); generateDdlRequest.businessObjectFormatEntity = businessObjectFormatEntity; generateDdlRequest.customDdlEntity = customDdlEntity; generateDdlRequest.isPartitioned = isPartitioned; generateDdlRequest.tableName = request.getTableName(); generateDdlRequest.includeDropTableStatement = request.isIncludeDropTableStatement(); generateDdlRequest.includeIfNotExistsOption = request.isIncludeIfNotExistsOption(); return generateCreateTableDdlHelper(generateDdlRequest); }
hive13DdlGenerator.generateReplaceColumnsStatement(businessObjectFormatDdlRequest, businessObjectFormatEntity); Assert.fail("expected IllegalArgumentException, but no exception was thrown");
/** * Gets the DDL character value based on the specified configured character value. This method supports UTF-8 encoded strings and will "Hive" escape any * non-ASCII printable characters using '\(value)'. * * @param string the configured character value. * * @return the DDL character value. */ public String getDdlCharacterValue(String string) { return getDdlCharacterValue(string, false); }
storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities); !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames); List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters); throw new ObjectNotFoundException( String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities);
getHiveDataType(schemaColumn, businessObjectFormatEntity), StringUtils.isNotBlank(schemaColumn.getDescription()) ? String.format(" COMMENT '%s'", escapeSingleQuotes(schemaColumn.getDescription())) : ""));
assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity); generateStandardBaseDdl(generateDdlRequest, sb, businessObjectFormat, ifNotExistsOption); processPartitionFiltersForGenerateDdl(generateDdlRequest, sb, replacements, generateDdlRequest.businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption);
String s3BucketName = getS3BucketName(storageUnitEntity.getStorage(), generateDdlRequest.s3BucketNames); .subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormat.getSchema().getPartitions().size()); for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName()))
expectedHivePartitions = new ArrayList<>(); resultHivePartitions = hive13DdlGenerator .getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, TEST_S3_KEY_PREFIX, storageFilePaths, businessObjectDataEntity, STORAGE_NAME); assertEquals(expectedHivePartitions, resultHivePartitions); expectedHivePartitions = Arrays.asList(HivePartitionDto.builder().withPath("").withPartitionValues(Arrays.asList(PARTITION_VALUE)).build()); resultHivePartitions = hive13DdlGenerator .getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, TEST_S3_KEY_PREFIX, storageFilePaths, businessObjectDataEntity, STORAGE_NAME); assertEquals(expectedHivePartitions, resultHivePartitions); HivePartitionDto.builder().withPath("/column1=aa/column2=bb").withPartitionValues(Arrays.asList(PARTITION_VALUE, "aa", "bb")).build()); resultHivePartitions = hive13DdlGenerator .getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, TEST_S3_KEY_PREFIX, storageFilePaths, businessObjectDataEntity, STORAGE_NAME); assertEquals(expectedHivePartitions, resultHivePartitions);
Pattern pattern = getHivePathPattern(autoDiscoverableSubPartitionColumns); for (String storageFile : storageFiles)