Refine search
public static boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[params.size()])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
for (Path aSrc : srcs) { totalSrcLgth += BackupUtils.getFilesLength(aSrc.getFileSystem(super.getConf()), aSrc); job = super.execute(); MapReduceBackupCopyJob.this.getConf().getInt("hbase.backup.progressreport.frequency", 500); float lastProgress = progressDone; job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
@Override protected Path createInputFileListing(Job job) throws IOException { if (conf.get(NUMBER_OF_LEVELS_TO_PRESERVE_KEY) == null) { return super.createInputFileListing(job); FileSystem fs = srcFiles.get(0).getFileSystem(conf); for (Path path : srcFiles) { FileStatus fst = fs.getFileStatus(path); totalBytesExpected += fst.getLen(); Text key = getKey(path); cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesExpected); cfg.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString()); cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalRecords); } catch (NoSuchFieldException | SecurityException | IllegalArgumentException
/** copy empty directory on dfs file system */ public void testEmptyDir() throws Exception { String namenode = null; MiniDFSCluster cluster = null; try { Configuration conf = new Configuration(); cluster = new MiniDFSCluster(conf, 2, true, null); final FileSystem hdfs = cluster.getFileSystem(); namenode = FileSystem.getDefaultUri(conf).toString(); if (namenode.startsWith("hdfs://")) { FileSystem fs = FileSystem.get(URI.create(namenode), new Configuration()); fs.mkdirs(new Path("/empty")); ToolRunner.run(new DistCp(conf), new String[] { "-log", namenode+"/logs", namenode+"/empty", namenode+"/dest"}); fs = FileSystem.get(URI.create(namenode+"/destdat"), conf); assertTrue("Destination directory does not exist.", fs.exists(new Path(namenode+"/dest"))); deldir(hdfs, "/dest"); deldir(hdfs, "/empty"); deldir(hdfs, "/logs"); } } finally { if (cluster != null) { cluster.shutdown(); } } }
final String testDst = testRoot + "/" + testDstRel; String nnUri = FileSystem.getDefaultUri(conf).toString(); DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(URI.create(nnUri), conf); fs.mkdirs(new Path(testRoot)); if (createSrcDir) { fs.mkdirs(new Path(testSrc)); fs.mkdirs(new Path(testDst)); nnUri+testDst} : new String[]{"-pub", nnUri+testSrc, nnUri+testDst}; ToolRunner.run(conf, new DistCp(), args);
public void testCopyDfsToDfsUpdateWithSkipCRC() throws Exception { MiniDFSCluster cluster = null; try { Configuration conf = new Configuration(); cluster = new MiniDFSCluster(conf, 2, true, null); final FileSystem hdfs = cluster.getFileSystem(); final String namenode = hdfs.getUri().toString(); FileSystem fs = FileSystem.get(URI.create(namenode), new Configuration()); deldir(hdfs,"/logs"); Path srcPath = new Path("/srcdat", testfilename); Path destPath = new Path("/destdat", testfilename); FSDataOutputStream out = fs.create(srcPath, true); out.writeUTF(srcData); out.close(); ToolRunner.run(new DistCp(conf), new String[] { "-p", "-update", ToolRunner.run(new DistCp(conf), new String[] { "-p", "-update",
MyFile[] files = createFiles(URI.create(nnUri), "/srcdat"); FileStatus[] srcstat = getFileStatus(fs, "/srcdat", files); for(int i = 0; i < srcstat.length; i++) { fs.setOwner(srcstat[i].getPath(), "u" + i, null); ToolRunner.run(new DistCp(conf), new String[]{"-pu", nnUri+"/srcdat", nnUri+"/destdat"}); assertTrue("Source and destination directories do not match.", fs.setOwner(srcstat[i].getPath(), null, "g" + i); ToolRunner.run(new DistCp(conf), new String[]{"-pg", nnUri+"/srcdat", nnUri+"/destdat"}); assertTrue("Source and destination directories do not match.", ToolRunner.run(new DistCp(conf), new String[]{"-pp", nnUri+"/srcdat", nnUri+"/destdat"}); assertTrue("Source and destination directories do not match.", MyFile[] files = createFiles(URI.create(nnUri), "/srcdat"); fs.mkdirs(new Path("/srcdat/tmpf1")); fs.mkdirs(new Path("/srcdat/tmpf2")); FileStatus[] srcstat = getFileStatus(fs, "/srcdat", files); FsPermission[] permissions = new FsPermission[srcstat.length]; ToolRunner.run(new DistCp(conf), new String[]{"-pt", nnUri+"/srcdat", nnUri+"/destdat"});
final String testDst = testRoot + "/" + testDstRel; String nnUri = FileSystem.getDefaultUri(conf).toString(); FileSystem fs = FileSystem.get(URI.create(nnUri), conf); fs.mkdirs(new Path(testRoot)); if (createSrcDir) { fs.mkdirs(new Path(testSrc)); fs.mkdirs(new Path(testDst)); nnUri+testDst} : new String[]{"-pu", nnUri+testSrc, nnUri+testDst}; ToolRunner.run(conf, new DistCp(), args);
/** tests basedir option copying files from dfs file system to dfs file system */ public void testBasedir() throws Exception { String namenode = null; MiniDFSCluster cluster = null; try { Configuration conf = new Configuration(); cluster = new MiniDFSCluster(conf, 2, true, null); final FileSystem hdfs = cluster.getFileSystem(); namenode = FileSystem.getDefaultUri(conf).toString(); if (namenode.startsWith("hdfs://")) { MyFile[] files = createFiles(URI.create(namenode), "/basedir/middle/srcdat"); ToolRunner.run(new DistCp(conf), new String[] { "-basedir", "/basedir", namenode+"/basedir/middle/srcdat", namenode+"/destdat"}); assertTrue("Source and destination directories do not match.", checkFiles(hdfs, "/destdat/middle/srcdat", files)); deldir(hdfs, "/destdat"); deldir(hdfs, "/basedir"); deldir(hdfs, "/logs"); } } finally { if (cluster != null) { cluster.shutdown(); } } }
MiniMRCluster mr = null; try { Configuration conf = new Configuration(); dfs = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = dfs.getFileSystem(); final FsShell shell = new FsShell(conf); namenode = fs.getUri().toString(); mr = new MiniMRCluster(3, namenode, 1); MyFile[] files = createFiles(fs.getUri(), "/srcdat"); long totsize = 0; for (MyFile f : files) { job.setLong("distcp.bytes.per.map", totsize / 3); ToolRunner.run(new DistCp(job), new String[] {"-m", "100", "-log", FileStatus[] logs = fs.listStatus(new Path(logdir)); ToolRunner.run(new DistCp(job), new String[] {"-m", "1", "-log", logs = fs.globStatus(new Path(namenode+"/logs/part*")); assertTrue("Unexpected map count, logs.length=" + logs.length, logs.length == 1);
FileSystem dest = getFileSystem(destHdfs); if (!src.equals(dest)) { Path srcPath = src.makeQualified(new Path(jobDirectory.toUri().getPath())); Path destPath = dest.makeQualified(new Path(jobDirectory.toUri().getPath())); Path logPath = new Path(destPath, "logs"); dest.delete(destPath, true); options.preserve(DistCpOptions.FileAttribute.PERMISSION); String[] args = (jobtracker == null) ? new String[0] : new String[] {"-jt", jobtracker}; int res = ToolRunner.run(conf, new DistCp(conf, options), args); if (res != 0) { log.error("The toolrunner failed to execute. Returned with exit code of " + res);
@Override public void handleOutputs(Configuration conf, Path workingPath, int index) throws IOException { FileSystem srcFs = workingPath.getFileSystem(conf); Path src = getSourcePattern(workingPath, index); Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(src), src); FileSystem dstFs = path.getFileSystem(conf); if (!dstFs.exists(path)) { dstFs.mkdirs(path); int maxDistributedCopyTasks = conf.getInt(RuntimeParameters.FILE_TARGET_MAX_DISTCP_TASKS, 1000); LOG.info( "Source and destination are in different file systems, performing distcp of {} files from [{}] to [{}] " options.setBlocking(true); Configuration distCpConf = new Configuration(conf); distCpConf.unset("mapreduce.job.cache.files"); distCpConf.unset("mapreduce.job.classpath.files"); distCpConf.unset("tmpjars"); DistCp distCp = new DistCp(distCpConf, options); if (!distCp.execute().isSuccessful()) { throw new CrunchRuntimeException("Unable to move files through distcp from " + src + " to " + path); Path d = getDestFile(conf, s, path, s.getName().contains("-m-")); srcFs.rename(s, d);
ToolRunner.run(new DistCp(new Configuration()), new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat", "file:///"+TEST_ROOT_DIR+"/destdat"}); Path p = new Path(root, fname); FileSystem.LOG.info("fname=" + fname + ", exists? " + fs.exists(p)); ToolRunner.run(new DistCp(new Configuration()), new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat/"+fname, "file:///"+TEST_ROOT_DIR+"/dest2/"+fname}); DistCp.Arguments distcpArgs = DistCp.Arguments.valueOf(args, conf); assertFalse("Single file update failed to skip copying even though the " + "file exists at destination.", DistCp.setup(conf, job, distcpArgs)); fs.mkdirs(new Path(TEST_ROOT_DIR+"/dest2")); MyFile[] files2 = {createFile(root, fs, 0)}; String sname = files2[0].getName(); ToolRunner.run(new DistCp(new Configuration()), new String[] {"-update", "file:///"+TEST_ROOT_DIR+"/srcdat/"+sname, updateFiles(fs, TEST_ROOT_DIR+"/srcdat", files2, 1); ToolRunner.run(new DistCp(new Configuration()), new String[] {"-update", "file:///"+TEST_ROOT_DIR+"/srcdat/"+sname,
/** * Create a default working folder for the job, under the * job staging directory * * @return Returns the working folder information * @throws Exception - EXception if any */ private Path createMetaFolderPath() throws Exception { Configuration configuration = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir( new Cluster(configuration), configuration); Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt())); if (LOG.isDebugEnabled()) LOG.debug("Meta folder location: " + metaFolderPath); configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString()); return metaFolderPath; }
@Test public void testSourceRoot() throws Exception { FileSystem fs = cluster.getFileSystem(); String rootStr = fs.makeQualified(new Path("/")).toString(); String testRoot = "/testdir." + getMethodName(); // Case 1. The target does not exist. Path tgtPath = new Path(testRoot + "/nodir"); String tgtStr = fs.makeQualified(tgtPath).toString(); String[] args = new String[]{rootStr, tgtStr}; Assert.assertThat(ToolRunner.run(conf, new DistCp(), args), is(0)); // Case 2. The target exists. Path tgtPath2 = new Path(testRoot + "/dir"); assertTrue(fs.mkdirs(tgtPath2)); String tgtStr2 = fs.makeQualified(tgtPath2).toString(); String[] args2 = new String[]{rootStr, tgtStr2}; Assert.assertThat(ToolRunner.run(conf, new DistCp(), args2), is(0)); } }
@Test(timeout=100000) public void testCleanup() { try { Path sourcePath = new Path("noscheme:///file"); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); DistCpOptions options = new DistCpOptions(sources, target); Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir( new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); try { new DistCp(conf, options).execute(); } catch (Throwable t) { Assert.assertEquals(stagingDir.getFileSystem(conf). listStatus(stagingDir).length, 0); } } catch (Exception e) { LOG.error("Exception encountered ", e); Assert.fail("testCleanup failed " + e.getMessage()); } }
String localTmp = "file://" + new File(rootPath, "ShellServerIT.tmp").toString(); ts.exec("exporttable -t " + table + " " + exportUri, true); DistCp cp = newDistCp(new Configuration(false)); String import_ = "file://" + new File(rootPath, "ShellServerIT.import").toString(); if (getCluster().getClientConfig().hasSasl()) { FileSystem localFs = FileSystem.getLocal(new Configuration(false)); Path localTmpPath = new Path(localTmp); localFs.mkdirs(localTmpPath); Path importDir = new Path(import_); fs.mkdirs(importDir); new FileReader(new File(exportDir, "distcp.txt")))) { for (String line; (line = reader.readLine()) != null;) { Path exportedFile = new Path(line); assertEquals("Failed to run distcp: " + Arrays.toString(distCpArgs), 0, cp.run(distCpArgs));
/** * test methods run end execute of DistCp class. silple copy file * @throws Exception */ @Test public void testCleanup() throws Exception { Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); Path soure = createFile("tmp.txt"); Path target = createFile("target.txt"); DistCp distcp = new DistCp(conf, null); String[] arg = { soure.toString(), target.toString() }; distcp.run(arg); Assert.assertTrue(fs.exists(target)); }
protected void distCpDirectory(Path workDir, FileSystem src, FileSystem dest, Configuration distcpConfig, boolean deleteAfterDistCp) throws Exception { Path srcPath = src.makeQualified(workDir); Path destPath = dest.makeQualified(workDir); Path logPath = new Path(destPath, "logs"); dest.delete(destPath, true); options.setBlocking(true); DistCp cp = new DistCp(distcpConfig, options); log.info("Starting distcp from " + srcPath + " to " + destPath + " with configuration: " + options); try { cp.execute(); } catch (Exception e) { throw new RuntimeException("Distcp failed.", e); for (FileStatus destFile : dest.listStatus(destPath)) { destFiles.put(destFile.getPath().getName(), destFile); for (FileStatus srcFile : src.listStatus(srcPath)) { FileStatus destFile = destFiles.get(srcFile.getPath().getName()); if (destFile == null || destFile.getLen() != srcFile.getLen()) { throw new RuntimeException("DistCp failed to copy " + srcFile.getPath());
/** copy files from local file system to local file system */ public void testCopyFromLocalToLocal() throws Exception { Configuration conf = new Configuration(); FileSystem localfs = FileSystem.get(LOCAL_FS, conf); MyFile[] files = createFiles(LOCAL_FS, TEST_ROOT_DIR+"/srcdat"); ToolRunner.run(new DistCp(new Configuration()), new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat", "file:///"+TEST_ROOT_DIR+"/destdat"}); assertTrue("Source and destination directories do not match.", checkFiles(localfs, TEST_ROOT_DIR+"/destdat", files)); deldir(localfs, TEST_ROOT_DIR+"/destdat"); deldir(localfs, TEST_ROOT_DIR+"/srcdat"); }