@Override public long getFileMTime(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { FileStatus status = fileSystem.getFileStatus(p); return status.getModificationTime(); } catch (IOException e) { throw new FileBasedHelperException("Failed to retrieve getModificationTime on path: " + p + " , fileId: " + fileId, e); } } }
private void incrementBytesReadCounter() { try { this.counters.inc(CounterNames.FileBytesRead, this.fsHelper.getFileSize(this.currentFile)); } catch (FileBasedHelperException e) { LOG.info("Unable to get file size. Will skip increment to bytes counter " + e.getMessage()); LOG.debug(e.getMessage(), e); } catch (UnsupportedOperationException e) { LOG.info("Unable to get file size. Will skip increment to bytes counter " + e.getMessage()); LOG.debug(e.getMessage(), e); } } }
@Override public long getFileSize(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { FileStatus status = fileSystem.getFileStatus(p); return status.getLen(); } catch (IOException e) { throw new FileBasedHelperException("Failed to get metadata on " + fileId, e); } }
@Override public List<String> getcurrentFsSnapshot(State state) { List<String> results = Lists.newArrayList(); String path = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY); try { LOGGER.info("Running ls command with input " + path); results = this.fsHelper.ls(path); } catch (FileBasedHelperException e) { LOGGER.error("Not able to run ls command due to " + e.getMessage() + " will not pull any files", e); } return results; } }
@Override public InputStream getFileStream(String fileId) throws FileBasedHelperException { Preconditions.checkNotNull(fileId, "fileId is required"); Path p = new Path(fileId); try { if (bufferSizeByte.isPresent()) { return fileSystem.open(p, bufferSizeByte.get()); } return fileSystem.open(p); } catch (IOException e) { throw new FileBasedHelperException("Failed to open files stream on path: " + p + " , fileId: " + fileId, e); } }
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download gzip compressed file: " + file); try { InputStream inputStream = this.fileBasedExtractor.getCloser().register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(new GZIPInputStream(inputStream), ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } } }
@Override public void connect() throws FileBasedHelperException { String uri = this.state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI); try { if (Strings.isNullOrEmpty(uri)) { throw new FileBasedHelperException(ConfigurationKeys.SOURCE_FILEBASED_FS_URI + " has not been configured"); } this.createFileSystem(uri); } catch (IOException e) { throw new FileBasedHelperException("Cannot connect to given URI " + uri + " due to " + e.getMessage(), e); } catch (URISyntaxException e) { throw new FileBasedHelperException("Malformed uri " + uri + " due to " + e.getMessage(), e); } catch (InterruptedException e) { throw new FileBasedHelperException("Interrupted exception is caught when getting the proxy file system", e); } }
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download file: " + file); try { InputStream inputStream = this.fileBasedExtractor.getCloser().register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(inputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } } }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getLen(); } catch (IOException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } } }
private void incrementBytesReadCounter() { try { this.counters.inc(CounterNames.FileBytesRead, this.fsHelper.getFileSize(this.currentFile)); } catch (FileBasedHelperException e) { LOG.info("Unable to get file size. Will skip increment to bytes counter " + e.getMessage()); LOG.debug(e.getMessage(), e); } catch (UnsupportedOperationException e) { LOG.info("Unable to get file size. Will skip increment to bytes counter " + e.getMessage()); LOG.debug(e.getMessage(), e); } } }
@Override public long getFileMTime(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getModificationTime(); } catch (IOException e) { throw new FileBasedHelperException(String .format("Failed to get last modified time for file at path %s due to error %s", filePath, e.getMessage()), e); } }
@Override public List<String> getcurrentFsSnapshot(State state) { List<String> results = Lists.newArrayList(); String path = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY); try { LOGGER.info("Running ls command with input " + path); results = this.fsHelper.ls(path); } catch (FileBasedHelperException e) { LOGGER.error("Not able to run ls command due to " + e.getMessage() + " will not pull any files", e); } return results; } }
@Override public long getFileSize(String filePath) throws FileBasedHelperException { try { return this.getFileSystem().getFileStatus(new Path(filePath)).getLen(); } catch (IOException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } }
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download file: " + file); try { InputStream inputStream = this.fileBasedExtractor.getCloser().register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(inputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } } }
@Override public List<String> ls(String path) throws FileBasedHelperException { List<String> results = new ArrayList<>(); try { lsr(new Path(path), results); } catch (IOException e) { throw new FileBasedHelperException("Cannot do ls on path " + path + " due to " + e.getMessage(), e); } return results; }
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download gzip compressed file: " + file); try { InputStream inputStream = this.fileBasedExtractor.getCloser().register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(new GZIPInputStream(inputStream), ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } } }
/** * List files under folder ID recursively. Folder won't be included in the result. If there's no files under folder ID, it returns empty list. * If folder ID is not defined, it will provide files under root directory. * {@inheritDoc} * @see org.apache.gobblin.source.extractor.filebased.FileBasedHelper#ls(java.lang.String) */ @Override public List<String> ls(String folderId) throws FileBasedHelperException { List<String> result = new ArrayList<>(); if (StringUtils.isEmpty(folderId)) { folderId = "/"; } Path p = new Path(folderId); FileStatus[] statusList = null; try { statusList = fileSystem.listStatus(p); } catch (FileNotFoundException e) { return result; } catch (IOException e) { throw new FileBasedHelperException("Falied to list status on path " + p + ", folderID: " + folderId, e); } for (FileStatus status : statusList) { if (status.isDirectory()) { result.addAll(ls(GoogleDriveFileSystem.toFileId(status.getPath()))); } else { result.add(GoogleDriveFileSystem.toFileId(status.getPath())); } } return result; }
@Override public List<String> ls(String path) throws FileBasedHelperException { try { List<String> list = new ArrayList<>(); ChannelSftp channel = getSftpChannel(); Vector<LsEntry> vector = channel.ls(path); for (LsEntry entry : vector) { list.add(entry.getFilename()); } channel.disconnect(); return list; } catch (SftpException e) { throw new FileBasedHelperException("Cannot execute ls command on sftp connection", e); } }
/** * Returns an {@link DataFileReader} to the specified avro file. * <p> * Note: It is the caller's responsibility to close the returned {@link DataFileReader}. * </p> * * @param file The path to the avro file to open. * @return A {@link DataFileReader} for the specified avro file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException { try { if (!this.getFileSystem().exists(new Path(file))) { LOGGER.warn(file + " does not exist."); return null; } if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { return new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>()); } return new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>()); } catch (IOException e) { throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e); } }
public Schema getAvroSchema(String file) throws FileBasedHelperException { DataFileReader<GenericRecord> dfr = null; try { if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { dfr = new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>()); } else { dfr = new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>()); } return dfr.getSchema(); } catch (IOException e) { throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e); } finally { if (dfr != null) { try { dfr.close(); } catch (IOException e) { LOGGER.error("Failed to close avro file " + file, e); } } } }