public static boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[params.size()])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
@Override public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[0])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
@Override public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[0])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
.withAppend( command.hasOption(DistCpOptionSwitch.APPEND.getSwitch())) .withCRC( command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) .withBlocking(
.withSyncFolder(true) .withAppend(true) .withCRC(true) .build(); fail("Append should fail if skipCrc option is specified");
/** * Run distcp -update srcDir destDir. * @param srcDir local source directory * @param destDir remote destination directory. * @return the completed job * @throws Exception any failure. */ private Job distCpUpdate(final Path srcDir, final Path destDir) throws Exception { describe("\nDistcp -update from " + srcDir + " to " + destDir); lsR("Local to update", localFS, srcDir); lsR("Remote before update", remoteFS, destDir); return runDistCp(buildWithStandardOptions( new DistCpOptions.Builder( Collections.singletonList(srcDir), destDir) .withDeleteMissing(true) .withSyncFolder(true) .withCRC(true) .withOverwrite(false))); }
@Test public void testSetSkipCRC() { final DistCpOptions.Builder builder = new DistCpOptions.Builder( Collections.singletonList(new Path("hdfs://localhost:8020/source")), new Path("hdfs://localhost:8020/target/")); Assert.assertFalse(builder.build().shouldSkipCRC()); final DistCpOptions options = builder.withSyncFolder(true).withCRC(true) .build(); Assert.assertTrue(options.shouldSyncFolder()); Assert.assertTrue(options.shouldSkipCRC()); }