public HiveProxy(Hive hive) { this.hasHiveClient = hive != null; this.conf = hive.getConf(); this.handler = null; }
public HiveMetaStoreChecker(Hive hive) { super(); this.hive = hive; conf = hive.getConf(); }
public HiveProxy(Hive hive) { this.hasHiveClient = hive != null; this.conf = hive.getConf(); this.handler = null; }
public void init(Hive db) { final boolean dummy = db.getConf().get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) .equals("DUMMY"); if (dummy) { // Dummy registry does not cache information and forwards all requests to metastore initialized.set(true); LOG.info("Using dummy materialized views registry"); } else { // We initialize the cache ExecutorService pool = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("HiveMaterializedViewsRegistry-%d") .build()); pool.submit(new Loader(db)); pool.shutdown(); } }
@Override public void run() { try { SessionState ss = new SessionState(db.getConf()); ss.setIsHiveServerQuery(true); // All is served from HS2, we do not need e.g. Tez sessions SessionState.start(ss); final boolean cache = !db.getConf() .get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY"); for (String dbName : db.getAllDatabases()) { for (Table mv : db.getAllMaterializedViewObjects(dbName)) { addMaterializedView(db.getConf(), mv, OpType.LOAD, cache); } } initialized.set(true); LOG.info("Materialized views registry has been initialized"); } catch (HiveException e) { LOG.error("Problem connecting to the metastore when initializing the view registry", e); } } }
private void checkLocalFunctionResources(Hive db, List<ResourceUri> resources) throws HiveException { // If this is a non-local warehouse, then adding resources from the local filesystem // may mean that other clients will not be able to access the resources. // So disallow resources from local filesystem in this case. if (resources != null && resources.size() > 0) { try { String localFsScheme = FileSystem.getLocal(db.getConf()).getUri().getScheme(); String configuredFsScheme = FileSystem.get(db.getConf()).getUri().getScheme(); if (configuredFsScheme.equals(localFsScheme)) { // Configured warehouse FS is local, don't need to bother checking. return; } for (ResourceUri res : resources) { String resUri = res.getUri(); if (ResourceDownloader.isFileUri(resUri)) { throw new HiveException("Hive warehouse is non-local, but " + res.getUri() + " specifies file on local filesystem. " + "Resources on non-local warehouse should specify a non-local scheme/path"); } } } catch (HiveException e) { throw e; } catch (Exception e) { LOG.error("Exception caught in checkLocalFunctionResources", e); throw new HiveException(e); } } }
private Collection<List<ColumnStatisticsObj>> verifyAndGetPartColumnStats( Hive hive, Table tbl, String colName, Set<Partition> parts) throws TException, LockException { List<String> partNames = new ArrayList<String>(parts.size()); for (Partition part : parts) { if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(part.getTable(), part.getParameters(), colName)) { Logger.debug("Stats for part : " + part.getSpec() + " column " + colName + " are not up to date."); return null; } partNames.add(part.getName()); } AcidUtils.TableSnapshot tableSnapshot = AcidUtils.getTableSnapshot(hive.getConf(), tbl); Map<String, List<ColumnStatisticsObj>> result = hive.getMSC().getPartitionColumnStatistics( tbl.getDbName(), tbl.getTableName(), partNames, Lists.newArrayList(colName), tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null); if (result.size() != parts.size()) { Logger.debug("Received " + result.size() + " stats for " + parts.size() + " partitions"); return null; } return result.values(); }
private int killQuery(Hive db, KillQueryDesc desc) throws HiveException { SessionState sessionState = SessionState.get(); for (String queryId : desc.getQueryIds()) { sessionState.getKillQuery().killQuery(queryId, "User invoked KILL QUERY", db.getConf()); } LOG.info("kill query called ({})", desc.getQueryIds()); return 0; }
/** * Get a mocked Hive object that does not create a real meta store client object * This gets rid of the datanucleus initializtion which makes it easier * to run test from IDEs * @param hiveConf * @throws MetaException * */ private void setupDataNucleusFreeHive(HiveConf hiveConf) throws MetaException { Hive db = Mockito.mock(Hive.class); Mockito.when(db.getMSC()).thenReturn(null); Mockito.when(db.getConf()).thenReturn(hiveConf); Hive.set(db); }
private static void resolveMetadata(Properties props) throws HiveException, IOException { DatabaseType dbType = DatabaseType.valueOf( props.getProperty(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())); LOGGER.debug("Resolving db type: {}", dbType.toString()); if (dbType == DatabaseType.METASTORE) { HiveConf hconf = Hive.get().getConf(); props.setProperty(JdbcStorageConfig.JDBC_URL.getPropertyName(), getMetastoreConnectionURL(hconf)); props.setProperty(JdbcStorageConfig.JDBC_DRIVER_CLASS.getPropertyName(), getMetastoreDriver(hconf)); String user = getMetastoreJdbcUser(hconf); if (user != null) { props.setProperty(CONFIG_USERNAME, user); } String pwd = getMetastoreJdbcPasswd(hconf); if (pwd != null) { props.setProperty(CONFIG_PWD, pwd); } props.setProperty(JdbcStorageConfig.DATABASE_TYPE.getPropertyName(), getMetastoreDatabaseType(hconf)); } }
@Before public void setUp() throws Exception { hive = Hive.get(); hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 15); hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "throw"); msc = new HiveMetaStoreClient(hive.getConf()); checker = new HiveMetaStoreChecker(msc, hive.getConf()); partCols = new ArrayList<FieldSchema>(); partCols.add(new FieldSchema(partDateName, serdeConstants.STRING_TYPE_NAME, "")); partCols.add(new FieldSchema(partCityName, serdeConstants.STRING_TYPE_NAME, "")); parts = new ArrayList<Map<String, String>>(); Map<String, String> part1 = new HashMap<String, String>(); part1.put(partDateName, "2008-01-01"); part1.put(partCityName, "london"); parts.add(part1); Map<String, String> part2 = new HashMap<String, String>(); part2.put(partDateName, "2008-01-02"); part2.put(partCityName, "stockholm"); parts.add(part2); //cleanup just in case something is left over from previous run dropDbTable(); }
@Test(expected = MetastoreException.class) public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException, MetastoreException { Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); checker.checkMetastore(catName, dbName, tableName, null, new CheckResult()); }
@Test public void testSkipInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException, MetastoreException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); checker = new HiveMetaStoreChecker(msc, hive.getConf()); Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); createPartitionsDirectoriesOnFS(table, 2); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); }
@Test public void testAdditionalPartitionDirs() throws HiveException, AlreadyExistsException, IOException, MetastoreException { Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), partDateName + "=2017-01-01/" + partCityName + "=paloalto/fakePartCol=fakepartValue"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); //fakePart path partition is added since the defined partition keys are valid assertEquals(1, result.getPartitionsNotInMs().size()); }
/** * Removes all databases and tables from the metastore */ public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) throws HiveException, MetaException, NoSuchObjectException { for (String dbName : hive.getAllDatabases()) { if (dbName.equals("default")) { continue; } try { Path path = getDbPath(hive, wh, dbName); FileSystem whFs = path.getFileSystem(hive.getConf()); whFs.setPermission(path, defaultPerm); } catch (IOException ex) { //ignore } hive.dropDatabase(dbName, true, true, true); } //clean tables in default db for (String tablename : hive.getAllTables("default")) { hive.dropTable("default", tablename, true, true); } }
@Test public void testSkipInvalidOrderForPartitionKeysOnFS() throws AlreadyExistsException, HiveException, IOException, MetastoreException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); checker = new HiveMetaStoreChecker(msc, hive.getConf()); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createInvalidPartitionDirsOnFS(testTable, 2); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 2); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); }
@Test public void testSingleThreadedCheckMetastore() throws HiveException, AlreadyExistsException, IOException, MetastoreException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); }
@Test public void testDataDeletion() throws HiveException, IOException, TException { Database db = new Database(); db.setName(dbName); hive.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); table.setInputFormatClass(TextInputFormat.class); table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); table.setPartCols(partCols); hive.createTable(table); table = hive.getTable(dbName, tableName); Path fakeTable = table.getPath().getParent().suffix( Path.SEPARATOR + "faketable"); fs = fakeTable.getFileSystem(hive.getConf()); fs.mkdirs(fakeTable); fs.deleteOnExit(fakeTable); Path fakePart = new Path(table.getDataLocation().toString(), "fakepartition=fakevalue"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); hive.dropTable(dbName, tableName, true, true); assertFalse(fs.exists(fakePart)); hive.dropDatabase(dbName); assertFalse(fs.exists(fakeTable)); }
/** * Tests the case when the number of partition keys are more than the threadpool size. * * @throws HiveException * @throws AlreadyExistsException * @throws IOException */ @Test public void testDeeplyNestedPartitionedTables() throws HiveException, AlreadyExistsException, IOException, MetastoreException { hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 2); int poolSize = 2; // create a deeply nested table which has more partition keys than the pool size Table testTable = createPartitionedTestTable(dbName, tableName, poolSize + 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); }
/** * Tests single threaded implementation for deeply nested partitioned tables * * @throws HiveException * @throws AlreadyExistsException * @throws IOException */ @Test public void testSingleThreadedDeeplyNestedTables() throws HiveException, AlreadyExistsException, IOException, MetastoreException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); int poolSize = 2; // create a deeply nested table which has more partition keys than the pool size Table testTable = createPartitionedTestTable(dbName, tableName, poolSize + 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); }