public static boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[params.size()])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
@Override public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[0])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }
/** * Override options for split large files. */ private void setOptionsForSplitLargeFile() { if (blocksPerChunk <= 0) { return; } LOG.info("Enabling preserving blocksize since " + DistCpOptionSwitch.BLOCKS_PER_CHUNK.getSwitch() + " is passed."); preserve(FileAttribute.BLOCKSIZE); LOG.info("Set " + DistCpOptionSwitch.APPEND.getSwitch() + " to false since " + DistCpOptionSwitch.BLOCKS_PER_CHUNK.getSwitch() + " is passed."); this.append = false; }
Assert.assertFalse( builder.build().shouldPreserve(FileAttribute.PERMISSION)); builder.preserve(FileAttribute.PERMISSION); Assert.assertTrue(builder.build().shouldPreserve(FileAttribute.PERMISSION)); builder.preserve(FileAttribute.PERMISSION); Assert.assertTrue(builder.build().shouldPreserve(FileAttribute.PERMISSION));
new Path("hdfs://localhost:8020/source/first"), new Path("hdfs://localhost:8020/target/")) .preserve(FileAttribute.ACL) .build(); Assert.assertFalse(options.shouldPreserve(FileAttribute.BLOCKSIZE)); new Path("hdfs://localhost:8020/source/first"), new Path("hdfs://localhost:8020/target/")) .preserve(FileAttribute.BLOCKSIZE) .preserve(FileAttribute.REPLICATION) .preserve(FileAttribute.PERMISSION) .preserve(FileAttribute.USER) .preserve(FileAttribute.GROUP) .preserve(FileAttribute.CHECKSUMTYPE) .build();
.preserve(FileAttribute.PERMISSION).build(); options.appendToConf(conf); final DistCpContext context = new DistCpContext(options);
@Override public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) .withSyncFolder(true) .withCRC(true) .preserve(FileAttribute.BLOCKSIZE) .build(); // Creates the command-line parameters for distcp List<String> params = constructDistCpParams(srcPaths, dst, conf); try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params.toArray(new String[0])) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }