@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return format.getSplits(job, numSplits); }
@Override public InputSplit[] getSplits(JobConf job, int splits) throws IOException { // ensure that the table properties were copied assertEquals("val1", job.get("myprop1")); assertEquals("val2", job.get("myprop2")); // ensure that both of the partitions are in the complete list. String[] dirs = job.get("hive.complete.dir.list").split("\t"); assertEquals(2, dirs.length); Arrays.sort(dirs); assertEquals(true, dirs[0].endsWith("/state=CA")); assertEquals(true, dirs[1].endsWith("/state=OR")); return super.getSplits(job, splits); } }
InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits); for (InputSplit is : iss) { result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits); for (InputSplit is : iss) { result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
targetInputFormat.configure(targetJob); FileInputFormat.setInputPaths(targetJob, targetPath); InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);
@Test public void testIgnoreDirs() throws Exception { Configuration conf = getConfiguration(); conf.setBoolean(FileInputFormat.INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS, true); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1"); MockFileSystem mockFs = (MockFileSystem) new Path("test:///").getFileSystem(conf); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); InputSplit[] splits = fileInputFormat.getSplits(job, 1); Assert.assertEquals("Input splits are not correct", 1, splits.length); FileSystem.closeAll(); }
@Test public void testMaxBlockLocationsOldSplitsWithErasureCoding() throws Exception { JobConf jobConf = new JobConf(conf); org.apache.hadoop.mapred.TextInputFormat fileInputFormat = new org.apache.hadoop.mapred.TextInputFormat(); fileInputFormat.configure(jobConf); final org.apache.hadoop.mapred.InputSplit[] splits = fileInputFormat.getSplits(jobConf, 1); JobSplitWriter.createSplitFiles(submitDir, conf, fs, splits); validateSplitMetaInfo(); }
@Test public void testListLocatedStatus() throws Exception { Configuration conf = getConfiguration(); conf.setBoolean("fs.test.impl.disable.cache", false); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); MockFileSystem mockFs = (MockFileSystem) new Path("test:///").getFileSystem(conf); Assert.assertEquals("listLocatedStatus already called", 0, mockFs.numListLocatedStatusCalls); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); InputSplit[] splits = fileInputFormat.getSplits(job, 1); Assert.assertEquals("Input splits are not correct", 2, splits.length); Assert.assertEquals("listLocatedStatuss calls", 1, mockFs.numListLocatedStatusCalls); FileSystem.closeAll(); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return format.getSplits(job, numSplits); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return format.getSplits(job, numSplits); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return format.getSplits(job, numSplits); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return format.getSplits(job, numSplits); }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1); String[] locations = splits[0].getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits[0].getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
/** * Test using the gzip codec and an empty input file */ @Test public void testGzipEmpty() throws IOException { JobConf job = new JobConf(defaultConf); CompressionCodec gzip = new GzipCodec(); ReflectionUtils.setConf(gzip, job); localFs.delete(workDir, true); writeFile(localFs, new Path(workDir, "empty.gz"), gzip, ""); FileInputFormat.setInputPaths(job, workDir); TextInputFormat format = new TextInputFormat(); format.configure(job); InputSplit[] splits = format.getSplits(job, 100); assertEquals("Compressed files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length); List<Text> results = readSplit(format, splits[0], job); assertEquals("Compressed empty file length == 0", 0, results.size()); }
/** * generate splits for this run * * @param input_path * @param job * @return */ private InputSplit[] generateDebugSplits(Path input_path, JobConf job) { long block_size = localFs.getDefaultBlockSize(); log.info("default block size: " + (block_size / 1024 / 1024) + "MB"); // ---- set where we'll read the input files from ------------- FileInputFormat.setInputPaths(job, input_path); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); int numSplits = 1; InputSplit[] splits = null; try { splits = format.getSplits(job, numSplits); } catch (IOException e) { log.error("Error with splits",e); } return splits; }
/** * generate splits for this run * * @param input_path * @param job * @return */ private InputSplit[] generateDebugSplits(Path input_path, JobConf job) { long block_size = localFs.getDefaultBlockSize(); log.info("default block size: " + (block_size / 1024 / 1024) + "MB"); // ---- set where we'll read the input files from ------------- FileInputFormat.setInputPaths(job, input_path); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); int numSplits = 1; InputSplit[] splits = null; try { splits = format.getSplits(job, numSplits); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return splits; }
public void testNumInputs() throws Exception { JobConf job = new JobConf(conf); dfs = newDFSCluster(job); FileSystem fs = dfs.getFileSystem(); System.out.println("FileSystem " + fs.getUri()); Path inputDir = new Path("/foo/"); final int numFiles = 10; String fileNameBase = "part-0000"; for (int i=0; i < numFiles; ++i) { createInputs(fs, inputDir, fileNameBase + String.valueOf(i)); } createInputs(fs, inputDir, "_meta"); createInputs(fs, inputDir, "_temp"); // split it using a file input format TextInputFormat.addInputPath(job, inputDir); TextInputFormat inFormat = new TextInputFormat(); inFormat.configure(job); InputSplit[] splits = inFormat.getSplits(job, 1); assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES, numFiles, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0)); }
private static void doSingleGzipBufferSize(JobConf jConf) throws IOException { TextInputFormat format = new TextInputFormat(); format.configure(jConf); // here's Nth pair of DecompressorStreams: InputSplit[] splits = format.getSplits(jConf, 100); assertEquals("compressed splits == 2", 2, splits.length); FileSplit tmp = (FileSplit) splits[0]; if (tmp.getPath().getName().equals("testCompressThenConcat.txt.gz")) { System.out.println(" (swapping)"); splits[0] = splits[1]; splits[1] = tmp; } List<Text> results = readSplit(format, splits[0], jConf); assertEquals("splits[0] length (num lines)", 84, results.size()); assertEquals("splits[0][0]", "Call me Ishmael. Some years ago--never mind how long precisely--having", results.get(0).toString()); assertEquals("splits[0][42]", "Tell me, does the magnetic virtue of the needles of the compasses of", results.get(42).toString()); results = readSplit(format, splits[1], jConf); assertEquals("splits[1] length (num lines)", 84, results.size()); assertEquals("splits[1][0]", "Call me Ishmael. Some years ago--never mind how long precisely--having", results.get(0).toString()); assertEquals("splits[1][42]", "Tell me, does the magnetic virtue of the needles of the compasses of", results.get(42).toString()); }
public void testNumInputs() throws Exception { JobConf job = new JobConf(conf); dfs = newDFSCluster(job); FileSystem fs = dfs.getFileSystem(); System.out.println("FileSystem " + fs.getUri()); Path inputDir = new Path("/foo/"); final int numFiles = 10; String fileNameBase = "part-0000"; for (int i=0; i < numFiles; ++i) { createInputs(fs, inputDir, fileNameBase + String.valueOf(i)); } createInputs(fs, inputDir, "_meta"); createInputs(fs, inputDir, "_temp"); // split it using a file input format TextInputFormat.addInputPath(job, inputDir); TextInputFormat inFormat = new TextInputFormat(); inFormat.configure(job); InputSplit[] splits = inFormat.getSplits(job, 1); assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES, numFiles, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0)); }
/** * Test using the gzip codec and an empty input file */ @Test (timeout=5000) public void testGzipEmpty() throws IOException { JobConf job = new JobConf(defaultConf); CompressionCodec gzip = new GzipCodec(); ReflectionUtils.setConf(gzip, job); localFs.delete(workDir, true); writeFile(localFs, new Path(workDir, "empty.gz"), gzip, ""); FileInputFormat.setInputPaths(job, workDir); TextInputFormat format = new TextInputFormat(); format.configure(job); InputSplit[] splits = format.getSplits(job, 100); assertEquals("Compressed files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length); List<Text> results = readSplit(format, splits[0], job); assertEquals("Compressed empty file length == 0", 0, results.size()); }