public static void main(String[] args) throws Exception { HiveConf conf = new HiveConf(); conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); SessionState.start(new CliSessionState(conf)); new CliDriver().processLine(args[0]); }
/** * Create a Context with a given executionId. ExecutionId, together with * user name and conf, will determine the temporary directory locations. */ private Context(Configuration conf, String executionId) { this.conf = conf; this.executionId = executionId; this.rewrittenStatementContexts = new HashSet<>(); // local & non-local tmp location is configurable. however it is the same across // all external file systems nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId); localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath(); scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR); opContext = new CompilationOpContext(); viewsTokenRewriteStreams = new HashMap<>(); }
public HiveConf createHiveConf(String metaStoreURI, String hiveMetaStorePrincipal) throws IOException { HiveConf hcatConf = new HiveConf(); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hcatConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); hcatConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); hcatConf.set(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname, hiveMetaStorePrincipal); return hcatConf; }
private HiveConf createHiveConf(String metaStoreURI, boolean tokenAuthEnabled) { if (!tokenAuthEnabled) { return null; } HiveConf hcatConf = new HiveConf(); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI); hcatConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); return hcatConf; }
public LlapPluginEndpointClientImpl( Configuration conf, Token<JobTokenIdentifier> token, int expectedNodes) { // A single concurrent request per node is currently hardcoded. The node includes a port number // so different AMs on the same host count as different nodes; we only have one request type, // and it is not useful to send more than one in parallel. super(LlapPluginEndpointClientImpl.class.getSimpleName(), HiveConf.getIntVar(conf, ConfVars.LLAP_PLUGIN_CLIENT_NUM_THREADS), conf, token, HiveConf.getTimeVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_TIMEOUT_MS, TimeUnit.MILLISECONDS), HiveConf.getTimeVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS, TimeUnit.MILLISECONDS), expectedNodes, 1); }
private static void setPlanPath(Configuration conf, Path hiveScratchDir) throws IOException { if (getPlanPath(conf) == null) { // this is the unique conf ID, which is kept in JobConf as part of the plan file name String jobID = UUID.randomUUID().toString(); Path planPath = new Path(hiveScratchDir, jobID); if (!HiveConf.getBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN)) { FileSystem fs = planPath.getFileSystem(conf); // since we are doing RPC creating a directory is un-necessary fs.mkdirs(planPath); } HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, planPath.toUri().toString()); } }
public static String externalTableLocation(HiveConf hiveConf, String location) throws SemanticException { String baseDir = hiveConf.get(HiveConf.ConfVars.REPL_EXTERNAL_TABLE_BASE_DIR.varname); Path basePath = new Path(baseDir); Path currentPath = new Path(location); String targetPathWithoutSchemeAndAuth = basePath.toUri().getPath() + currentPath.toUri().getPath(); Path dataLocation; try { dataLocation = PathBuilder.fullyQualifiedHDFSUri( new Path(targetPathWithoutSchemeAndAuth), basePath.getFileSystem(hiveConf) ); } catch (IOException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e); } LOG.info("Incoming external table location: {} , new location: {}", location, dataLocation.toString()); return dataLocation.toString(); }
@Override protected int execute(DriverContext driverContext) { try { Hive hiveDb = getHive(); Path dumpRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), getNextDumpDir()); DumpMetaData dmd = new DumpMetaData(dumpRoot, conf); Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR)); Long lastReplId; if (work.isBootStrapDump()) { lastReplId = bootStrapDump(dumpRoot, dmd, cmRoot, hiveDb); } else { lastReplId = incrementalDump(dumpRoot, dmd, cmRoot, hiveDb); } prepareReturnValues(Arrays.asList(dumpRoot.toUri().toString(), String.valueOf(lastReplId))); } catch (Exception e) { LOG.error("failed", e); setException(e); return ErrorMsg.getErrorMsg(e.getMessage()).getErrorCode(); } return 0; }
@Test public void testCopyWithDistcp() throws IOException { Path copySrc = new Path("copySrc"); Path copyDst = new Path("copyDst"); HiveConf conf = new HiveConf(TestFileUtils.class); FileSystem mockFs = mock(FileSystem.class); when(mockFs.getUri()).thenReturn(URI.create("hdfs:///")); ContentSummary mockContentSummary = mock(ContentSummary.class); when(mockContentSummary.getFileCount()).thenReturn(Long.MAX_VALUE); when(mockContentSummary.getLength()).thenReturn(Long.MAX_VALUE); when(mockFs.getContentSummary(any(Path.class))).thenReturn(mockContentSummary); HadoopShims shims = mock(HadoopShims.class); when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf)).thenReturn(true); Assert.assertTrue(FileUtils.copy(mockFs, copySrc, mockFs, copyDst, false, false, conf, shims)); verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf); }
private Path createScratchDir() throws IOException { Path parent = new Path(SessionState.get().getHdfsScratchDirURIString(), SPARK_DIR); Path sparkDir = new Path(parent, sessionId); FileSystem fs = sparkDir.getFileSystem(conf); FsPermission fsPermission = new FsPermission(HiveConf.getVar( conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION)); fs.mkdirs(sparkDir, fsPermission); fs.deleteOnExit(sparkDir); return sparkDir; }
private List<Path> runRemoveTempOrDuplicateFilesTestCase(String executionEngine, boolean dPEnabled) throws Exception { Configuration hconf = new HiveConf(this.getClass()); // do this to verify that Utilities.removeTempOrDuplicateFiles does not revert to default scheme information hconf.set("fs.defaultFS", "hdfs://should-not-be-used/"); hconf.set(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, executionEngine); FileSystem localFs = FileSystem.getLocal(hconf); DynamicPartitionCtx dpCtx = getDynamicPartitionCtx(dPEnabled); Path tempDirPath = setupTempDirWithSingleOutputFile(hconf); FileSinkDesc conf = getFileSinkDesc(tempDirPath); List<Path> paths = Utilities.removeTempOrDuplicateFiles(localFs, tempDirPath, dpCtx, conf, hconf, false); String expectedScheme = tempDirPath.toUri().getScheme(); String expectedAuthority = tempDirPath.toUri().getAuthority(); assertPathsMatchSchemeAndAuthority(expectedScheme, expectedAuthority, paths); return paths; }
private static void fileDiff(String datafile, String testdir) throws Exception { String testFileDir = conf.get("test.data.files"); // inbuilt assumption that the testdir has only one output file. Path di_test = new Path(tmppath, testdir); if (!fs.exists(di_test)) { throw new RuntimeException(tmpdir + File.separator + testdir + " does not exist"); } if (!ShimLoader.getHadoopShims().isDirectory(fs.getFileStatus(di_test))) { throw new RuntimeException(tmpdir + File.separator + testdir + " is not a directory"); } FSDataInputStream fi_test = fs.open((fs.listStatus(di_test))[0].getPath()); FileInputStream fi_gold = new FileInputStream(new File(testFileDir,datafile)); if (!Utilities.contentsEqual(fi_gold, fi_test, false)) { LOG.error(di_test.toString() + " does not match " + datafile); assertEquals(false, true); } }
@BeforeClass public static void setUpBeforeClass() throws Exception { TestHCatClient.startMetaStoreServer(); hconf = TestHCatClient.getConf(); hconf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname,""); hconf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); TEST_PATH = System.getProperty("test.warehouse.dir","/tmp") + Path.SEPARATOR + TestCommands.class.getCanonicalName() + "-" + System.currentTimeMillis(); Path testPath = new Path(TEST_PATH); FileSystem fs = FileSystem.get(testPath.toUri(),hconf); fs.mkdirs(testPath); driver = DriverFactory.newDriver(hconf); SessionState.start(new CliSessionState(hconf)); client = HCatClient.create(hconf); }
@Override public void handle(Context withinContext) throws Exception { LOG.info("Processing#{} ALTER_TABLE message : {}", fromEventId(), eventMessageAsJSON); Table qlMdTableBefore = new Table(before); if (!Utils .shouldReplicate(withinContext.replicationSpec, qlMdTableBefore, withinContext.hiveConf)) { return; } if (Scenario.ALTER == scenario) { withinContext.replicationSpec.setIsMetadataOnly(true); Table qlMdTableAfter = new Table(after); Path metaDataPath = new Path(withinContext.eventRoot, EximUtil.METADATA_NAME); // If we are not dumping metadata about a table, we shouldn't be dumping basic statistics // as well, since that won't be accurate. So reset them to what they would look like for an // empty table. if (withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY)) { qlMdTableAfter.setStatsStateLikeNewTable(); } EximUtil.createExportDump( metaDataPath.getFileSystem(withinContext.hiveConf), metaDataPath, qlMdTableAfter, null, withinContext.replicationSpec, withinContext.hiveConf); } DumpMetaData dmd = withinContext.createDmd(this); dmd.setPayload(eventMessageAsJSON); dmd.write(); }
@Test public void testSkipInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException, MetastoreException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); checker = new HiveMetaStoreChecker(msc, hive.getConf()); Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); createPartitionsDirectoriesOnFS(table, 2); CheckResult result = new CheckResult(); checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<CheckResult.PartitionResult> emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); }
Writer(Path dbRoot, HiveConf hiveConf) throws IOException { this.hiveConf = hiveConf; writePath = new Path(dbRoot, FILE_NAME); excludeExternalTables = !hiveConf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES); dumpMetadataOnly = hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY); if (shouldWrite()) { this.writer = FileSystem.get(hiveConf).create(writePath); } }
public static void cleanUpScratchDir(HiveConf hiveConf) { if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_START_CLEANUP_SCRATCHDIR)) { String hiveScratchDir = hiveConf.get(HiveConf.ConfVars.SCRATCHDIR.varname); try { Path jobScratchDir = new Path(hiveScratchDir); LOG.info("Cleaning scratchDir : " + hiveScratchDir); FileSystem fileSystem = jobScratchDir.getFileSystem(hiveConf); fileSystem.delete(jobScratchDir, true); } // Even if the cleanup throws some exception it will continue. catch (Throwable e) { LOG.warn("Unable to delete scratchDir : " + hiveScratchDir, e); } } }
Path exportPath = null; if (exportPathString != null && exportPathString.length() == 0) { exportPath = fs.getHomeDirectory(); } else { exportPath = new Path(exportPathString); Path metaPath = new Path(exportPath, name + "." + dateString); LOG.info("Exporting the metadata of table " + tbl.toString() + " to path " + metaPath.toString()); try { fs.mkdirs(metaPath); } catch (IOException e) { throw new MetaException(e.getMessage()); Path outFile = new Path(metaPath, name + EximUtil.METADATA_NAME); try { SessionState.getConsole().printInfo("Beginning metadata export"); EximUtil.createExportDump(fs, outFile, mTbl, null, null, new HiveConf(conf, MetaDataExportListener.class)); if (moveMetadataToTrash == true) { wh.deleteDir(metaPath, true, false, false);
tgtFs = targetPath.getFileSystem(conf); } catch (IOException e) { LOG.error("Failed to get dest fs", e); throw new HiveException(e.getMessage(), e); srcFs = sourcePath.getFileSystem(conf); } catch (IOException e) { LOG.error("Failed to get src fs", e); throw new HiveException(e.getMessage(), e); if (srcFs.exists(sourcePath)) { Path deletePath = null; if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INSERT_INTO_MULTILEVEL_DIRS)) { deletePath = createTargetPath(targetPath, tgtFs); try { if (deletePath != null) { tgtFs.delete(deletePath, true); deletePath); throw new HiveException("Unable to rename: " + sourcePath + " to: " + targetPath); } else if (!tgtFs.mkdirs(targetPath)) { throw new HiveException("Unable to make directory: " + targetPath);
private void fireInsertEvent(Table tbl, Map<String, String> partitionSpec, boolean replace, List<Path> newFiles) throws HiveException { if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML)) { LOG.debug("Firing dml insert event"); if (tbl.isTemporary()) { LOG.debug("Not firing dml insert event as " + tbl.getTableName() + " is temporary"); return; FileSystem fileSystem = tbl.getDataLocation().getFileSystem(conf); FireEventRequestData data = new FireEventRequestData(); InsertEventRequestData insertData = new InsertEventRequestData(); throw new HiveException(e);