TreeSet<String> getAllFilePath(Path filePath, String resPathPrefix) throws IOException { String fsPathPrefix = filePath.toUri().getPath(); TreeSet<String> fileList = new TreeSet<>(); RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, true); while (it.hasNext()) { String path = it.next().getPath().toUri().getPath(); if (!path.startsWith(fsPathPrefix)) throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix); String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1); fileList.add(resPath); } return fileList; }
private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber, boolean splittable) { splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath()); return createInternalHiveSplit( status.getPath(), status.getBlockLocations(), 0, status.getLen(), status.getLen(), bucketNumber, splittable); }
private List<Path> getFilesRecursively(String fileBackupDir) throws IllegalArgumentException, IOException { FileSystem fs = FileSystem.get((new Path(fileBackupDir)).toUri(), new Configuration()); List<Path> list = new ArrayList<>(); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(fileBackupDir), true); while (it.hasNext()) { Path p = it.next().getPath(); if (HFile.isHFileFormat(fs, p)) { list.add(p); } } return list; }
@Override protected LocatedFileStatus computeNext() { while (true) { while (remoteIterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(remoteIterator); // Ignore hidden files and directories. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } if (status.isDirectory()) { switch (nestedDirectoryPolicy) { case IGNORED: continue; case RECURSE: paths.add(status.getPath()); continue; case FAIL: throw new NestedDirectoryNotAllowedException(); } } return status; } if (paths.isEmpty()) { return endOfData(); } remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst()); } }
public static List<String> listPaths(JavaSparkContext sc, String path, boolean recursive) throws IOException { if (path.endsWith(".blob.core.windows.net/") || path.endsWith(".blob.core.windows.net")) { //Azure library bug: seems that we get an infinite loop if we try to list paths on the // root directory, for some versions of the Azure Hadoop library - deadlocks on fileIter.hasNext() throw new IllegalStateException("Cannot list paths from root directory due to Azure library bug"); } List<String> paths = new ArrayList<>(); Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(URI.create(path), config); RemoteIterator fileIter = hdfs.listFiles(new Path(path), recursive); while (fileIter.hasNext()) { String filePath = ((LocatedFileStatus) fileIter.next()).getPath().toString(); paths.add(filePath); } return paths; }
@Override public String[] listFiles(URI fileUri, boolean recursive) throws IOException { ArrayList<String> filePathStrings = new ArrayList<>(); Path path = new Path(fileUri); if (_hadoopFS.exists(path)) { RemoteIterator<LocatedFileStatus> fileListItr = _hadoopFS.listFiles(path, recursive); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); filePathStrings.add(file.getPath().toUri().toString()); } } else { throw new IllegalArgumentException("segmentUri is not valid"); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; }
public static void main(String[] args) throws IOException{ DatasetReaderFromHdfs ds = new DatasetReaderFromHdfs(); int j = 0; while (hdfsIter.hasNext()) { LocatedFileStatus next = hdfsIter.next(); Path path = next.getPath(); String currentPath = path.toUri().getPath(); //String index = getRelativeFilename(currentPath); System.out.println("file name : i = " + j ++ + " path=" + currentPath); } }
public static BackupInfo loadBackupInfo(Path backupRootPath, String backupId, FileSystem fs) throws IOException { Path backupPath = new Path(backupRootPath, backupId); RemoteIterator<LocatedFileStatus> it = fs.listFiles(backupPath, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.getPath().getName().equals(BackupManifest.MANIFEST_FILE_NAME)) { // Load BackupManifest BackupManifest manifest = new BackupManifest(fs, lfs.getPath().getParent()); BackupInfo info = manifest.toBackupInfo(); return info; } } return null; }
/** list files sorted by modification time that have not been modified since 'olderThan'. if * 'olderThan' is <= 0 then the filtering is disabled */ public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException { ArrayList<LocatedFileStatus> fstats = new ArrayList<>(); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false); while (itr.hasNext()) { LocatedFileStatus fileStatus = itr.next(); if (olderThan > 0) { if (fileStatus.getModificationTime() <= olderThan) { fstats.add(fileStatus); } } else { fstats.add(fileStatus); } } Collections.sort(fstats, new ModifTimeComparator()); ArrayList<Path> result = new ArrayList<>(fstats.size()); for (LocatedFileStatus fstat : fstats) { result.add(fstat.getPath()); } return result; }
/** * Process the input stat. * If it is a file, return the file stat. * If it is a directory, traverse the directory if recursive is true; * ignore it if recursive is false. * @param stat input status * @throws IOException if any IO error occurs */ private void handleFileStat(LocatedFileStatus stat) throws IOException { if (stat.isFile()) { // file curFile = stat; } else if (recursive) { // directory itors.push(curItor); curItor = listLocatedStatus(stat.getPath()); } }
public static List<String> getFiles(FileSystem fs, Path rootDir, List<String> files, PathFilter filter) throws IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(rootDir, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isDirectory()) { continue; } // apply filter if (filter.accept(lfs.getPath())) { files.add(lfs.getPath().toString()); } } return files; }
@Override public void killOlderThan(long timestamp) throws IOException { Path taskLogDir = new Path(config.getDirectory()); FileSystem fs = taskLogDir.getFileSystem(hadoopConfig); if (fs.exists(taskLogDir)) { if (!fs.isDirectory(taskLogDir)) { throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir); } RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir); while (iter.hasNext()) { LocatedFileStatus file = iter.next(); if (file.getModificationTime() < timestamp) { Path p = file.getPath(); log.info("Deleting hdfs task log [%s].", p.toUri().toString()); fs.delete(p, true); } if (Thread.currentThread().isInterrupted()) { throw new IOException( new InterruptedException("Thread interrupted. Couldn't delete all tasklogs.") ); } } } } }
void findNext() throws IOException { while (iter.hasNext()) { LocatedFileStatus status = iter.next(); if (filter.accept(status.getPath())) { nextFile = status; return; } } // No more matching files in the iterator nextFile = null; } }
@Override public boolean hasNext() throws IOException { while (next == null && iter.hasNext()) { LocatedFileStatus unfilteredNext = iter.next(); if (!isChecksumFile(unfilteredNext.getPath())) { next = unfilteredNext; } } return next != null; }
private void getSortedFileList(Path eventPath, List<LocatedFileStatus> fileStatuses, FileSystem fileSystem) throws IOException { //Add all the files in this directory. No need to sort. RemoteIterator<LocatedFileStatus> iteratorNext = fileSystem.listFiles(eventPath, false); while (iteratorNext.hasNext()) { LocatedFileStatus status = iteratorNext.next(); LOG.info(" files added at getSortedFileList" + status.getPath()); fileStatuses.add(status); } // get all the directories in this path and sort them FileStatus[] eventDirs = fileSystem.listStatus(eventPath, EximUtil.getDirectoryFilter(fileSystem)); if (eventDirs.length == 0) { return; } Arrays.sort(eventDirs, new EventDumpDirComparator()); // add files recursively for each directory for (FileStatus fs : eventDirs) { getSortedFileList(fs.getPath(), fileStatuses, fileSystem); } }
protected void dumpBackupDir() throws IOException { // Dump Backup Dir FileSystem fs = FileSystem.get(conf1); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(BACKUP_ROOT_DIR), true); while (it.hasNext()) { LOG.debug(Objects.toString(it.next().getPath())); } } }
private List<String> listDir(Path p) throws IOException { ArrayList<String> result = new ArrayList<>(); RemoteIterator<LocatedFileStatus> fileNames = fs.listFiles(p, false); while (fileNames.hasNext()) { LocatedFileStatus fileStatus = fileNames.next(); result.add(Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toString()); } return result; }
static SortedSet<byte []> readKeysToSearch(final Configuration conf) throws IOException, InterruptedException { Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY)); FileSystem fs = FileSystem.get(conf); SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); if (!fs.exists(keysInputDir)) { throw new FileNotFoundException(keysInputDir.toString()); } if (!fs.isDirectory(keysInputDir)) { throw new UnsupportedOperationException("TODO"); } else { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false); while(iterator.hasNext()) { LocatedFileStatus keyFileStatus = iterator.next(); // Skip "_SUCCESS" file. if (keyFileStatus.getPath().getName().startsWith("_")) continue; result.addAll(readFileToSearch(conf, fs, keyFileStatus)); } } return result; }
private List<FileStatus> getListOfWALFiles(Configuration c) throws IOException { Path logRoot = new Path(CommonFSUtils.getWALRootDir(c), HConstants.HREGION_LOGDIR_NAME); FileSystem fs = logRoot.getFileSystem(c); RemoteIterator<LocatedFileStatus> it = fs.listFiles(logRoot, true); List<FileStatus> logFiles = new ArrayList<FileStatus>(); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isFile() && !AbstractFSWALProvider.isMetaFile(lfs.getPath())) { logFiles.add(lfs); LOG.info(Objects.toString(lfs)); } } return logFiles; }
private boolean hasHFileLink(Path tableDir) throws IOException { if (fs.exists(tableDir)) { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(tableDir, true); while (iterator.hasNext()) { LocatedFileStatus fileStatus = iterator.next(); if (fileStatus.isFile() && HFileLink.isHFileLink(fileStatus.getPath())) { return true; } } } return false; }