@Override public HdfsResource getResource() { return _hdfsUri == null ? null : new HdfsResource(_hdfsUri); }
@Override public boolean isExists() { final FileSystem fs = getHadoopFileSystem(); try { return fs.exists(getHadoopPath()); } catch (Exception e) { throw wrapException(e); } finally { FileHelper.safeClose(fs); } }
public Configuration getHadoopConfiguration() { final Configuration conf = new Configuration(); if (_hostname != null && _port > 0) { conf.set("fs.defaultFS", getScheme() + "://" + _hostname + ":" + _port); } final File hadoopConfigurationDirectory = getHadoopConfigurationDirectoryToUse(); if (hadoopConfigurationDirectory != null) { addResourceIfExists(conf, hadoopConfigurationDirectory, "core-site.xml"); addResourceIfExists(conf, hadoopConfigurationDirectory, "hdfs-site.xml"); } return conf; }
final String hadoopJobResultFileName = SparkRunner.DEFAULT_RESULT_PATH + "/" + jobName + SparkRunner.RESULT_FILE_EXTENSION; final String uri = HadoopUtils.getFileSystem().getUri().resolve(hadoopJobFileName).toString(); final HdfsResource analysisJobResource = new HdfsResource(uri); final OutputStream jobWriter = analysisJobResource.write(); new JaxbJobWriter(configuration).write(analysisJob, jobWriter); jobWriter.close(); analysisJob); final SparkRunner sparkRunner = new SparkRunner(configurationFile.getAbsolutePath(), analysisJobResource .getFilepath(), hadoopJobResultFileName);
public URI copyFileToHdfs(final File file, final String hdfsPath, final boolean overwrite) { final HdfsResource hdfsResource = createResource(hdfsPath); final URI uri = hdfsResource.getHadoopPath().toUri(); final boolean exists = hdfsResource.isExists(); if (!overwrite && exists) { // no need to copy logger.debug("Skipping file-copy to {} because file already exists", hdfsPath); return uri; } if (exists) { logger.info("Overwriting file on HDFS: {}", hdfsPath); } else { logger.debug("Copying file to HDFS: {}", hdfsPath); } hdfsResource.write(out -> { final FileInputStream in = new FileInputStream(file); FileHelper.copy(in, out); in.close(); }); return uri; } }
private void getResultFileFromCluster(TenantContext tenantContext, ExecutionLogger executionLogger, String hadoopResultFileName, String jobName) { HdfsResource resultsResource = null; try { resultsResource = new HdfsResource(HadoopUtils.getFileSystem().getUri().resolve(hadoopResultFileName) .toString()); if (resultsResource != null && resultsResource.isExists()) { final RepositoryFolder repositoryResultFolder = tenantContext.getResultFolder(); final String fileName = HadoopJobExecutionUtils.getUrlReadyJobName(jobName) + FileFilters.ANALYSIS_RESULT_SER.getExtension(); final Resource resourceFile = repositoryResultFolder.createFile(fileName, null).toResource(); logger.info("Writing the result to" + resourceFile.getQualifiedPath()); FileHelper.copy(resultsResource, resourceFile); } else { final String message = "An error has occured while running the job. The result was not persisted on Hadoop. Please check Hadoop and/or DataCleaner logs"; final Exception error = new Exception(message); executionLogger.setStatusFailed(null, null, error); } } catch (Exception e) { executionLogger.setStatusFailed(null, null, e); } } }
public boolean isDirectory(final URI path) { final Resource resource = getResourceToUse(path); if (!resource.isExists()) { return false; } if (resource instanceof FileResource) { return ((FileResource) resource).getFile().isDirectory(); } if (resource instanceof HdfsResource) { final FileSystem fileSystem = ((HdfsResource) resource).getHadoopFileSystem(); final Path hadoopPath = ((HdfsResource) resource).getHadoopPath(); try { return fileSystem.isDirectory(hadoopPath); } catch (final IOException e) { throw new IllegalStateException(e); } } // actually we don't know, but most likely it's not a directory return false; } }
private File getHadoopConfigurationDirectoryToUse() { File candidate = getDirectoryIfExists(null, _hadoopConfDir); if ("true".equals(System.getProperty(SYSTEM_PROPERTY_HADOOP_CONF_DIR_ENABLED))) { candidate = getDirectoryIfExists(candidate, System.getProperty("YARN_CONF_DIR")); candidate = getDirectoryIfExists(candidate, System.getProperty("HADOOP_CONF_DIR")); candidate = getDirectoryIfExists(candidate, System.getenv("YARN_CONF_DIR")); candidate = getDirectoryIfExists(candidate, System.getenv("HADOOP_CONF_DIR")); } return candidate; }
@Override public long getLastModified() { final FileSystem fs = getHadoopFileSystem(); try { return fs.getFileStatus(getHadoopPath()).getModificationTime(); } catch (Exception e) { throw wrapException(e); } finally { FileHelper.safeClose(fs); } }
public HdfsResource createResource(final String hdfsPath) { return new HdfsResource(_defaultFs.resolve(hdfsPath).toString()); }
@Override public OutputStream write() throws ResourceException { final FileSystem fs = getHadoopFileSystem(); try { final FSDataOutputStream out = fs.create(getHadoopPath(), true); return new HdfsFileOutputStream(out, fs); } catch (IOException e) { // we can close 'fs' in case of an exception FileHelper.safeClose(fs); throw wrapException(e); } }
public Resource getResourceToUse(final URI path) { if (path == null) { return null; } if (_hadoopConfiguration == null) { if ("hdfs".equals(path.getScheme())) { return new HdfsResource(path.toString()); } return new FileResource(path.toString()); } return new HadoopResource(path, _hadoopConfiguration, HadoopResource.DEFAULT_CLUSTERREFERENCE); }
@Override public OutputStream append() throws ResourceException { final FileSystem fs = getHadoopFileSystem(); try { final FSDataOutputStream out = fs.append(getHadoopPath()); return new HdfsFileOutputStream(out, fs); } catch (IOException e) { // we can close 'fs' in case of an exception FileHelper.safeClose(fs); throw wrapException(e); } }
@Override public Resource create(ResourceProperties properties) throws UnsupportedResourcePropertiesException { final Object hadoopConfDirProperty = properties.toMap().get(PROPERTY_HADOOP_CONF_DIR); final String hadoopConfDir = hadoopConfDirProperty == null ? null : hadoopConfDirProperty.toString(); return new HdfsResource(properties.getUri().toString(), hadoopConfDir); }
@Override public long getSize() { final FileSystem fs = getHadoopFileSystem(); try { if (fs.getFileStatus(getHadoopPath()).isFile()) { return fs.getFileStatus(getHadoopPath()).getLen(); } else { return fs.getContentSummary(getHadoopPath()).getLength(); } } catch (Exception e) { throw wrapException(e); } finally { FileHelper.safeClose(fs); } }
@Override public InputStream read() throws ResourceException { final FileSystem fs = getHadoopFileSystem(); final InputStream in; try { final Path hadoopPath = getHadoopPath(); // return a wrapper InputStream which manages the 'fs' closeable if (fs.getFileStatus(hadoopPath).isFile()) { in = fs.open(hadoopPath); return new HdfsFileInputStream(in, fs); } else { return new HdfsDirectoryInputStream(hadoopPath, fs); } } catch (Exception e) { // we can close 'fs' in case of an exception FileHelper.safeClose(fs); throw wrapException(e); } }