/*** * Get source {@link FileSystem} * @return Source {@link FileSystem} * @throws IOException Issue in fetching {@link FileSystem} */ private static FileSystem getSourceFs() throws IOException { return FileSystem.get(HadoopUtils.newConfiguration()); }
public AbstractAvroToOrcConverter() { try { this.fs = FileSystem.get(HadoopUtils.newConfiguration()); } catch (IOException e) { throw new RuntimeException(e); } }
public static Configuration getConfFromProperties(Properties properties) { Configuration conf = newConfiguration(); for (String propName : properties.stringPropertyNames()) { conf.set(propName, properties.getProperty(propName)); } return conf; }
private void calculateDatasetURN(){ EndPoint e = this.copyRoute.getCopyTo(); if (e instanceof HadoopFsEndPoint) { HadoopFsEndPoint copyTo = (HadoopFsEndPoint) e; Configuration conf = HadoopUtils.newConfiguration(); try { FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf); this.datasetURN = copyToFs.makeQualified(copyTo.getDatasetPath()).toString(); } catch (IOException e1) { // ignored } } else { this.datasetURN = e.toString(); } }
public HadoopFsEndPointDataset(HadoopFsEndPoint endPoint){ this.endPoint = endPoint; Configuration conf = HadoopUtils.newConfiguration(); try { FileSystem fs = FileSystem.get(this.endPoint.getFsURI(), conf); qualifiedDatasetRoot = fs.makeQualified(this.endPoint.getDatasetPath()); } catch (IOException e1) { // ignored qualifiedDatasetRoot = this.endPoint.getDatasetPath(); } }
/** * A helper utility for data/filesystem availability checking * @param path The path to be checked. For fs availability checking, just use "/" * @return If the filesystem/path exists or not. */ public boolean isPathAvailable(Path path){ try { Configuration conf = HadoopUtils.newConfiguration(); FileSystem fs = FileSystem.get(this.getFsURI(), conf); if (fs.exists(path)) { return true; } else { log.warn("Skipped the problematic FileSystem " + this.getFsURI()); return false; } } catch (IOException ioe) { log.warn("Skipped the problematic FileSystem " + this.getFsURI()); return false; } }
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; return new FileAwareInputStream(this.file, MeteredInputStream.builder().in(fsFromFile.open(this.file.getFileStatus().getPath())).build()); } return null; }
/** * Provides Hadoop configuration given state. * It also supports decrypting values on "encryptedPath". * Note that this encryptedPath path will be removed from full path of each config key and leaving only child path on the key(s). * If there's same config path as child path, the one stripped will have higher priority. * * e.g: * - encryptedPath: writer.fs.encrypted * before: writer.fs.encrypted.secret * after: secret * * Common use case for these encryptedPath: * When there's have encrypted credential in job property but you'd like Filesystem to get decrypted value. * * @param srcConfig source config. * @param encryptedPath Optional. If provided, config that is on this path will be decrypted. @see ConfigUtils.resolveEncrypted * Note that config on encryptedPath will be included in the end result even it's not part of includeOnlyPath * @return Hadoop Configuration. */ public static Configuration getConfFromState(State state, Optional<String> encryptedPath) { Config config = ConfigFactory.parseProperties(state.getProperties()); if (encryptedPath.isPresent()) { config = ConfigUtils.resolveEncrypted(config, encryptedPath); } Configuration conf = newConfiguration(); for (Entry<String, ConfigValue> entry : config.entrySet()) { conf.set(entry.getKey(), entry.getValue().unwrapped().toString()); } return conf; }
public HiveConvertPublisher(State state) throws IOException { super(state); this.avroSchemaManager = new AvroSchemaManager(FileSystem.get(HadoopUtils.newConfiguration()), state); this.metricContext = Instrumented.getMetricContext(state, HiveConvertPublisher.class); this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, EventConstants.CONVERSION_NAMESPACE).build(); Configuration conf = new Configuration(); Optional<String> uri = Optional.fromNullable(this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI)); if (uri.isPresent()) { this.fs = FileSystem.get(URI.create(uri.get()), conf); } else { this.fs = FileSystem.get(conf); } try { this.hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(state.getProperties()); } catch (SQLException e) { throw new RuntimeException(e); } this.watermarker = GobblinConstructorUtils.invokeConstructor( HiveSourceWatermarkerFactory.class, state.getProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, HiveSource.DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS)).createFromState(state); this.pool = HiveMetastoreClientPool.get(state.getProperties(), Optional.fromNullable(state.getProperties().getProperty(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); }
private static FileSystem getWriterFsUsingToken(State state, URI uri) throws IOException { try { String user = state.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME); Optional<Token<?>> token = ProxiedFileSystemUtils .getTokenFromSeqFile(user, new Path(state.getProp(ConfigurationKeys.FS_PROXY_AS_USER_TOKEN_FILE))); if (!token.isPresent()) { throw new IOException("No token found for user " + user); } return ProxiedFileSystemCache.fromToken().userNameToken(token.get()) .userNameToProxyAs(state.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME)).fsURI(uri) .conf(HadoopUtils.newConfiguration()).build(); } catch (ExecutionException e) { throw new IOException(e); } }
public GobblinTaskRunner(String applicationName, String helixInstanceName, String applicationId, String taskRunnerId, Config config, Optional<Path> appWorkDirOptional) throws Exception { this.helixInstanceName = helixInstanceName; this.config = config; this.taskRunnerId = taskRunnerId; Configuration conf = HadoopUtils.newConfiguration(); this.fs = buildFileSystem(this.config, conf); String zkConnectionString = config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY); LOGGER.info("Using ZooKeeper connection string: " + zkConnectionString); this.helixManager = HelixManagerFactory .getZKHelixManager(config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY), helixInstanceName, InstanceType.PARTICIPANT, zkConnectionString); Properties properties = ConfigUtils.configToProperties(config); TaskExecutor taskExecutor = new TaskExecutor(properties); TaskStateTracker taskStateTracker = new GobblinHelixTaskStateTracker(properties, this.helixManager); Path appWorkDir = appWorkDirOptional.isPresent() ? appWorkDirOptional.get() : GobblinClusterUtils.getAppWorkDirPath(this.fs, applicationName, applicationId); List<Service> services = Lists.newArrayList(taskExecutor, taskStateTracker, new JMXReportingService()); services.addAll(getServices()); this.serviceManager = new ServiceManager(services); this.containerMetrics = buildContainerMetrics(this.config, properties, applicationName, this.taskRunnerId); // Register task factory for the Helix task state model Map<String, TaskFactory> taskFactoryMap = Maps.newHashMap(); taskFactoryMap.put(GOBBLIN_TASK_FACTORY_NAME, new GobblinHelixTaskFactory(this.containerMetrics, taskExecutor, taskStateTracker, this.fs, appWorkDir, config)); this.taskStateModelFactory = new TaskStateModelFactory(this.helixManager, taskFactoryMap); this.helixManager.getStateMachineEngine().registerStateModelFactory("Task", this.taskStateModelFactory); }
Configuration conf = HadoopUtils.newConfiguration(); FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf); FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);
private static FileSystem getWriterFsUsingKeytab(State state, URI uri) throws IOException { FileSystem fs = FileSystem.newInstance(uri, new Configuration()); try { Preconditions.checkArgument(state.contains(ConfigurationKeys.FS_PROXY_AS_USER_NAME), "Missing required property " + ConfigurationKeys.FS_PROXY_AS_USER_NAME); Preconditions.checkArgument(state.contains(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS), "Missing required property " + ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS); Preconditions.checkArgument(state.contains(ConfigurationKeys.SUPER_USER_KEY_TAB_LOCATION), "Missing required property " + ConfigurationKeys.SUPER_USER_KEY_TAB_LOCATION); String user = state.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME); String superUser = state.getProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS); Path keytabLocation = new Path(state.getProp(ConfigurationKeys.SUPER_USER_KEY_TAB_LOCATION)); return ProxiedFileSystemCache.fromKeytab().userNameToProxyAs(user).fsURI(uri) .superUserKeytabLocation(keytabLocation).superUserName(superUser).conf(HadoopUtils.newConfiguration()) .referenceFS(fs).build(); } catch (ExecutionException e) { throw new IOException(e); } }