@Override public long getFileMTime(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { FileStatus status = fileSystem.getFileStatus(p); return status.getModificationTime(); } catch (IOException e) { throw new FileBasedHelperException("Failed to retrieve getModificationTime on path: " + p + " , fileId: " + fileId, e); } } }
@Override public long getFileSize(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { FileStatus status = fileSystem.getFileStatus(p); return status.getLen(); } catch (IOException e) { throw new FileBasedHelperException("Failed to get metadata on " + fileId, e); } }
@Override public InputStream getFileStream(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { if (bufferSizeByte.isPresent()) { return fileSystem.open(p, bufferSizeByte.get()); } return fileSystem.open(p); } catch (IOException e) { throw new FileBasedHelperException("Failed to open files stream on path: " + p + " , fileId: " + fileId, e); } }
@Override public void connect() throws FileBasedHelperException { String uri = this.state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI); try { if (Strings.isNullOrEmpty(uri)) { throw new FileBasedHelperException(ConfigurationKeys.SOURCE_FILEBASED_FS_URI + " has not been configured"); } this.createFileSystem(uri); } catch (IOException e) { throw new FileBasedHelperException("Cannot connect to given URI " + uri + " due to " + e.getMessage(), e); } catch (URISyntaxException e) { throw new FileBasedHelperException("Malformed uri " + uri + " due to " + e.getMessage(), e); } catch (InterruptedException e) { throw new FileBasedHelperException("Interrupted exception is caught when getting the proxy file system", e); } }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getLen(); } catch (IOException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } } }
@Override public long getFileMTime(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getModificationTime(); } catch (IOException e) { throw new FileBasedHelperException(String .format("Failed to get last modified time for file at path %s due to error %s", filePath, e.getMessage()), e); } }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getLen(); } catch (IOException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } }
@Override public List<String> ls(String path) throws FileBasedHelperException { List<String> results = new ArrayList<>(); try { lsr(new Path(path), results); } catch (IOException e) { throw new FileBasedHelperException("Cannot do ls on path " + path + " due to " + e.getMessage(), e); } return results; }
/** * List files under folder ID recursively. Folder won't be included in the result. If there's no files under folder ID, it returns empty list. * If folder ID is not defined, it will provide files under root directory. * {@inheritDoc} * @see org.apache.gobblin.source.extractor.filebased.FileBasedHelper#ls(java.lang.String) */ @Override public List<String> ls(String folderId) throws FileBasedHelperException { List<String> result = new ArrayList<>(); if (StringUtils.isEmpty(folderId)) { folderId = "/"; } Path p = new Path(folderId); FileStatus[] statusList = null; try { statusList = fileSystem.listStatus(p); } catch (FileNotFoundException e) { return result; } catch (IOException e) { throw new FileBasedHelperException("Falied to list status on path " + p + ", folderID: " + folderId, e); } for (FileStatus status : statusList) { if (status.isDirectory()) { result.addAll(ls(GoogleDriveFileSystem.toFileId(status.getPath()))); } else { result.add(GoogleDriveFileSystem.toFileId(status.getPath())); } } return result; }
@Override public List<String> ls(String path) throws FileBasedHelperException { try { List<String> list = new ArrayList<>(); ChannelSftp channel = getSftpChannel(); Vector<LsEntry> vector = channel.ls(path); for (LsEntry entry : vector) { list.add(entry.getFilename()); } channel.disconnect(); return list; } catch (SftpException e) { throw new FileBasedHelperException("Cannot execute ls command on sftp connection", e); } }
/** * Returns an {@link DataFileReader} to the specified avro file. * <p> * Note: It is the caller's responsibility to close the returned {@link DataFileReader}. * </p> * * @param file The path to the avro file to open. * @return A {@link DataFileReader} for the specified avro file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException { try { if (!this.getFileSystem().exists(new Path(file))) { LOGGER.warn(file + " does not exist."); return null; } if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { return new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>()); } return new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>()); } catch (IOException e) { throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e); } }
public Schema getAvroSchema(String file) throws FileBasedHelperException { DataFileReader<GenericRecord> dfr = null; try { if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { dfr = new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>()); } else { dfr = new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>()); } return dfr.getSchema(); } catch (IOException e) { throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e); } finally { if (dfr != null) { try { dfr.close(); } catch (IOException e) { LOGGER.error("Failed to close avro file " + file, e); } } } }
/** * Returns an {@link InputStream} to the specified file. * <p> * Note: It is the caller's responsibility to close the returned {@link InputStream}. * </p> * * @param path The path to the file to open. * @return An {@link InputStream} for the specified file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ @Override public InputStream getFileStream(String path) throws FileBasedHelperException { try { Path p = new Path(path); InputStream in = this.getFileSystem().open(p); // Account for compressed files (e.g. gzip). // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf()); CompressionCodec codec = factory.getCodec(p); return (codec == null) ? in : codec.createInputStream(in); } catch (IOException e) { throw new FileBasedHelperException("Cannot open file " + path + " due to " + e.getMessage(), e); } }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { ChannelSftp channelSftp = getSftpChannel(); long fileSize = channelSftp.lstat(filePath).getSize(); channelSftp.disconnect(); return fileSize; } catch (SftpException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } }
@Override public long getFileMTime(String filePath) throws FileBasedHelperException { ChannelSftp channelSftp = null; try { channelSftp = getSftpChannel(); int modificationTime = channelSftp.lstat(filePath).getMTime(); return modificationTime; } catch (SftpException e) { throw new FileBasedHelperException( String.format("Failed to get modified timestamp for file at path %s due to error %s", filePath, e.getMessage()), e); } finally { if (channelSftp != null) { channelSftp.disconnect(); } } }
/** * Executes a get SftpCommand and returns an input stream to the file * @param cmd is the command to execute * @param sftp is the channel to execute the command on * @throws SftpException */ @Override public InputStream getFileStream(String file) throws FileBasedHelperException { SftpGetMonitor monitor = new SftpGetMonitor(); try { ChannelSftp channel = getSftpChannel(); return new SftpFsFileInputStream(channel.get(file, monitor), channel); } catch (SftpException e) { throw new FileBasedHelperException("Cannot download file " + file + " due to " + e.getMessage(), e); } }
throw new FileBasedHelperException("Cannot connect to SFTP source", e);
@Override public long getFileSize(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { FileStatus status = fileSystem.getFileStatus(p); return status.getLen(); } catch (IOException e) { throw new FileBasedHelperException("Failed to get metadata on " + fileId, e); } }
@Override public List<String> ls(String path) throws FileBasedHelperException { List<String> results = new ArrayList<>(); try { lsr(new Path(path), results); } catch (IOException e) { throw new FileBasedHelperException("Cannot do ls on path " + path + " due to " + e.getMessage(), e); } return results; }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { ChannelSftp channelSftp = getSftpChannel(); long fileSize = channelSftp.lstat(filePath).getSize(); channelSftp.disconnect(); return fileSize; } catch (SftpException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } }