public static Schema schema(String name, FileSystem fs, Path location) throws IOException { if (!fs.exists(location)) { return null; } return visit(new GetSchema(name), fs, location); }
public static Format format(FileSystem fs, Path location) throws IOException { if (!fs.exists(location)) { return null; } return visit(new GetFormat(), fs, location); }
private static <T> T visit(PathVisitor<T> visitor, FileSystem fs, Path path) throws IOException { return visit(visitor, fs, path, Lists.<Path>newArrayList()); }
private static <T> T visit(PathVisitor<T> visitor, FileSystem fs, Path path, List<Path> followedLinks) throws IOException { if (fs.isFile(path)) { return visitor.file(fs, path); } else if (IS_SYMLINK != null && IS_SYMLINK.<Boolean>invoke(fs.getFileStatus(path))) { Preconditions.checkArgument(!followedLinks.contains(path), "Encountered recursive path structure at link: " + path); followedLinks.add(path); // no need to remove return visit(visitor, fs, fs.getLinkTarget(path), followedLinks); } List<T> children = Lists.newArrayList(); FileStatus[] statuses = fs.listStatus(path, PathFilters.notHidden()); for (FileStatus stat : statuses) { children.add(visit(visitor, fs, stat.getPath())); } return visitor.directory(fs, path, children); }
/** * Finds potential datasets by crawling a directory tree. * <p> * This method looks for any data files and directories appear to form a * dataset. This deliberately ignores information that may be stored in the * Hive metastore or .metadata folders. * <p> * Recognizes only Avro, Parquet, and JSON data files. * * @param fs a FileSystem for the root path * @param path a root Path that will be searched * @return a Collection with a DatasetDescriptor for each potential dataset. * @throws IOException */ public static Collection<DatasetDescriptor> findPotentialDatasets( FileSystem fs, Path path) throws IOException { List<DatasetDescriptor> descriptors = Lists.newArrayList(); Result result = visit(new FindDatasets(), fs, path); if (result instanceof Result.Table) { descriptors.add(descriptor(fs, (Result.Table) result)); } else if (result instanceof Result.Group) { for (Result.Table table : ((Result.Group) result).tables) { descriptors.add(descriptor(fs, table)); } } return descriptors; }
public static PartitionStrategy strategy(FileSystem fs, Path location) throws IOException { if (!fs.exists(location)) { return null; } List<Pair<String, Class<? extends Comparable>>> pairs = visit( new GetPartitionInfo(), fs, location); if (pairs == null || pairs.isEmpty() || pairs.size() <= 1) { return null; } PartitionStrategy.Builder builder = new PartitionStrategy.Builder(); // skip the initial partition because it is the containing directory for (int i = 1; i < pairs.size(); i += 1) { Pair<String, Class<? extends Comparable>> pair = pairs.get(i); builder.provided( pair.first() == null ? "partition_" + i : pair.first(), ProvidedFieldPartitioner.valuesString(pair.second())); } return builder.build(); }