runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) partitioned by (p int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(0,2),(0,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//because 'local' inpath doesn't delete source files runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T partition(p=0)"); runStatementOnDriver("insert into Tstage values(1,2),(1,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' into table T partition(p=1)"); runStatementOnDriver("insert into Tstage values(2,2),(2,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/3'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/3/data' into table T partition(p=1)"); List<String> rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t2", "t/p=1/delta_0000003_0000003_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4", "t/p=1/delta_0000003_0000003_0000/000000_0"}}; checkExpected(rs, expected, "load data inpath partitioned"); runStatementOnDriver("insert into Tstage values(5,2),(5,4)");
@Test public void testAbort() throws Exception { boolean isVectorized = false; runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); //and do a Load Data into the same table, which should now land in a delta_x_x. // 'data' is created by export command/ runStatementOnDriver("insert into T values(1,2),(3,4)"); runStatementOnDriver("START TRANSACTION"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("ROLLBACK"); String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"} }; checkResult(expected, testQuery, isVectorized, "load data inpath"); } void checkResult(String[][] expectedResult, String query, boolean isVectorized,
@Test public void testMMOrcTable() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true', 'transactional_properties'='insert_only')"); int[][] values = {{1,2},{3,4}}; runStatementOnDriver("insert into T " + makeValuesClause(values)); List<String> rs = runStatementOnDriver("select a, b from T order by b"); Assert.assertEquals("", stringifyValues(values), rs); }
/** * By default you can't load into bucketed tables. Things will break badly in acid (data loss, etc) * if loaded data is not bucketed properly. This test is to capture that this is still the default. * If the default is changed, Load Data should probably do more validation to ensure data is * properly distributed into files and files are named correctly. * With the availability of new feature to rewrite such "load data" commands into insert-as-select, * the test should let the load data pass. */ @Test public void testValidations() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')"); File createdFile= folder.newFile("myfile.txt"); FileUtils.writeStringToFile(createdFile, "hello world"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); // This will work with the new support of rewriting load into IAS. runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/Tstage' into table T"); }
@Test public void testLoadAcidFile() throws Exception { MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true); runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists T2"); runStatementOnDriver( "create table T (a int, b int) stored as orc"); //This is just a simple way to generate test data runStatementOnDriver("create table T2(a int, b int) stored as orc"); runStatementOnDriver("insert into T values(1,2)"); List<String> rs = runStatementOnDriver("select INPUT__FILE__NAME from T"); Assert.assertEquals(1, rs.size()); Assert.assertTrue("Unexpcted file name", rs.get(0) .endsWith("t/delta_0000001_0000001_0000/bucket_00000")); //T2 is an acid table so this should fail CommandProcessorResponse cpr = runStatementOnDriverNegative( "load data local inpath '" + rs.get(0) + "' into table T2"); Assert.assertEquals("Unexpected error code", ErrorMsg.LOAD_DATA_ACID_FILE.getErrorCode(), cpr.getErrorCode()); } }
@Test public void loadDataNonAcid2AcidConversion() throws Exception { loadDataNonAcid2AcidConversion(false); } @Test
@Test public void loadData() throws Exception { loadData(false); } @Test
@Test public void loadDataUpdate() throws Exception { loadDataUpdate(false); } @Test
void checkResult(String[][] expectedResult, String query, boolean isVectorized, String msg) throws Exception{ checkResult(expectedResult, query, isVectorized, msg, LOG); } @Test
@Before public void setUp() throws Exception { setUpInternal(); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); } @Override
@Test public void loadDataNonAcid2AcidConversionVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); loadDataNonAcid2AcidConversion(true); } @Test
@Test public void loadDataVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); loadData(true); } @Test
@Test public void loadDataUpdateVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); loadDataUpdate(true); } @Test
runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("START TRANSACTION"); runStatementOnDriver("insert into T values(1,2),(3,4)"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("COMMIT"); {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/delta_0000001_0000001_0001/000000_0"} }; checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected2 = new String[][] { {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/base_0000001_v0000023/bucket_00000"} }; checkResult(expected2, testQuery, isVectorized, "load data inpath (major)");
runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("insert into Tstage values(2,2),(3,3)"); runStatementOnDriver("insert into Tstage values(4,4),(5,5)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//clean the staging table runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional' = 'true')"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); "t/delta_10000001_10000001_0000/000002_0"}, }; checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("insert into Tstage values(5,6),(7,8)"); runStatementOnDriver("insert into Tstage values(8,8)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite");
private void loadDataUpdate(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver( "create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}}; checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("update T set b = 17 where a = 1"); String[][] expected2 = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0000/bucket_00000"} }; checkResult(expected2, testQuery, isVectorized, "update"); runStatementOnDriver("insert into T values(2,2)"); runStatementOnDriver("delete from T where a = 3"); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf);
private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected1 = new String[][] { {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002_v0000025/bucket_00000"} }; checkResult(expected1, testQuery, isVectorized, "load data inpath (minor)"); runStatementOnDriver("insert into T values(2,2)"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected2 = new String[][] {