@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); // Explode out directories that match the original FileInputFormat filters // since HFiles are written to directories where the // directory name is the column name for (FileStatus status : super.listStatus(job)) { if (status.isDirectory()) { FileSystem fs = status.getPath().getFileSystem(job.getConfiguration()); for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) { result.add(match); } } else { result.add(status); } } return result; }
@Test public void testListStatusNestedRecursive() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = configureTestNestedRecursive(conf, localFs); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fif = new TextInputFormat(); List<FileStatus> statuses = fif.listStatus(job); verifyFileStatuses(expectedPaths, statuses, localFs); }
@Test public void testListStatusSimple() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = configureTestSimple(conf, localFs); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fif = new TextInputFormat(); List<FileStatus> statuses = fif.listStatus(job); verifyFileStatuses(expectedPaths, statuses, localFs); }
@Test public void testListStatusNestedNonRecursive() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = configureTestNestedNonRecursive(conf, localFs); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fif = new TextInputFormat(); List<FileStatus> statuses = fif.listStatus(job); verifyFileStatuses(expectedPaths, statuses, localFs); }
@Test public void testListStatusErrorOnNonExistantDir() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); configureTestErrorOnNonExistantDir(conf, localFs); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fif = new TextInputFormat(); try { fif.listStatus(job); Assert.fail("Expecting an IOException for a missing Input path"); } catch (IOException e) { Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2"); expectedExceptionPath = localFs.makeQualified(expectedExceptionPath); Assert.assertTrue(e instanceof InvalidInputException); Assert.assertEquals( "Input path does not exist: " + expectedExceptionPath.toString(), e.getMessage()); } }
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { // TODO consider if filter the sub dir return super.listStatus(job); }
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { // TODO consider if filter the sub dir return super.listStatus(job); }
protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> complete = super.listStatus(job); List<FileStatus> result = new ArrayList<>(complete.size()); for(FileStatus stat: complete) { if (stat.getLen() != 0) { result.add(stat); } } return result; } }
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)) { if (file.getPath().getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) { result.add(file); } } return result; }
@Override protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)){ String fileName = file.getPath().getName(); if (fileName.endsWith(AvroOutputFormat.EXT)){ result.add(file); } } return result; } }
@Override protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)){ String fileName = file.getPath().getName(); if (fileName.endsWith(AvroOutputFormat.EXT)){ result.add(file); } } return result; } }
@Override protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)){ String fileName = file.getPath().getName(); if (fileName.endsWith(AvroOutputFormat.EXT)){ result.add(file); } } return result; } }
@Override protected List<FileStatus> listStatus(final JobContext job) throws IOException { final List<FileStatus> list = super.listStatus(job); for (final FileStatus fileStatus : list) { if (!fileStatus.getPath().getName().toLowerCase().endsWith(".dbf")) { list.remove(fileStatus); } } return list; }
@Override protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)){ String fileName = file.getPath().getName(); if (fileName.endsWith(AvroOutputFormat.EXT)){ result.add(file); } } return result; } }
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { return MapRedUtil.getAllFileRecursively(super.listStatus(job), job.getConfiguration()); }
@Override protected List<FileStatus> listStatus(JobContext job )throws IOException { List<FileStatus> files = super.listStatus(job); int len = files.size(); for(int i=0; i < len; ++i) { FileStatus file = files.get(i); if (file.isDirectory()) { // it's a MapFile Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); // use the data file files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME))); } } return files; } }
@Override protected List<FileStatus> listStatus(JobContext job )throws IOException { List<FileStatus> files = super.listStatus(job); int len = files.size(); for(int i=0; i < len; ++i) { FileStatus file = files.get(i); if (file.isDirectory()) { // it's a MapFile Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); // use the data file files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME))); } } return files; } }
@Override protected List<FileStatus> listStatus(JobContext jobContext) throws IOException { return getAllFileRecursively(super.listStatus(jobContext), ContextUtil.getConfiguration(jobContext)); }
@Override protected List<FileStatus> listStatus(JobContext jobContext) throws IOException { return getAllFileRecursively(super.listStatus(jobContext), ContextUtil.getConfiguration(jobContext)); }
@Override protected List<FileStatus> listStatus(JobContext jobContext) throws IOException { return getAllFileRecursively(super.listStatus(jobContext), ContextUtil.getConfiguration(jobContext)); }