/** * Compare an existing state and a new {@link State} to ensure that the existing {@link State} contains all entries in the new * {@link State}, and update {@link #result} accordingly. */ protected void checkExistingIsSuperstate(State existingState, State newState) { checkExistingIsSuperset(existingState.getProperties().entrySet(), newState.getProperties().entrySet()); }
@Override public Properties getProperties() { Properties props = new Properties(); props.putAll(this.jobState.getProperties()); props.putAll(this.workUnit.getProperties()); props.putAll(super.getProperties()); return props; }
public State(State otherState) { this.commonProperties = otherState.getCommonProperties(); this.specProperties = new Properties(); this.specProperties.putAll(otherState.getProperties()); for (Object key : this.commonProperties.keySet()) { if (this.specProperties.containsKey(key) && this.commonProperties.get(key).equals(this.specProperties.get(key))) { this.specProperties.remove(key); } } }
@SuppressWarnings("unchecked") public T withProps(State props) { this.props = new State(props.getProperties()); return (T) this; }
public static Comparator<Requestor<SimpleDatasetRequest>> createRequestorComparator(State state) throws IOException { TreeMap<Integer, Pattern> tiers = Maps.newTreeMap(); Matcher matcher; for (Map.Entry<Object, Object> entry : state.getProperties().entrySet()) { if (entry.getKey() instanceof String && entry.getValue() instanceof String && (matcher = TIER_PATTERN.matcher((String) entry.getKey())).matches()) { int tier = Integer.parseInt(matcher.group(1)); String regex = (String)entry.getValue(); tiers.put(tier, Pattern.compile(regex)); } } return new SimpleDatasetHierarchicalPrioritizer.TierComparator(tiers); }
private void setTopicsFromConfigStore(State state) { Set<String> blacklistTopicsFromConfigStore = new HashSet<>(); Set<String> whitelistTopicsFromConfigStore = new HashSet<>(); ConfigStoreUtils.setTopicsFromConfigStore(state.getProperties(), blacklistTopicsFromConfigStore, whitelistTopicsFromConfigStore, MRCompactor.COMPACTION_BLACKLIST, MRCompactor.COMPACTION_WHITELIST); this.blacklist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList<>(blacklistTopicsFromConfigStore))); this.whitelist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList<>(whitelistTopicsFromConfigStore))); }
/** * Constructor. * * @param properties job configuration properties * @param previousDatasetStatesByUrns {@link SourceState} of the previous job run * @param previousWorkUnitStates an {@link Iterable} of {@link WorkUnitState}s of the previous job run */ public SourceState(State properties, Map<String, ? extends SourceState> previousDatasetStatesByUrns, Iterable<WorkUnitState> previousWorkUnitStates) { super.addAll(properties.getProperties()); this.previousDatasetStatesByUrns = ImmutableMap.copyOf(previousDatasetStatesByUrns); for (WorkUnitState workUnitState : previousWorkUnitStates) { this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState)); } }
/** * Starts metric reporting and appends the given metrics file suffix to the current value of * {@link ConfigurationKeys#METRICS_FILE_SUFFIX}. */ public void startMetricReportingWithFileSuffix(State state, String metricsFileSuffix) { Properties metricsReportingProps = new Properties(); metricsReportingProps.putAll(state.getProperties()); String oldMetricsFileSuffix = state.getProp(ConfigurationKeys.METRICS_FILE_SUFFIX, ConfigurationKeys.DEFAULT_METRICS_FILE_SUFFIX); if (Strings.isNullOrEmpty(oldMetricsFileSuffix)) { oldMetricsFileSuffix = metricsFileSuffix; } else { oldMetricsFileSuffix += "." + metricsFileSuffix; } metricsReportingProps.setProperty(ConfigurationKeys.METRICS_FILE_SUFFIX, oldMetricsFileSuffix); startMetricReporting(metricsReportingProps); }
public DataSource build() { validate(); Properties properties = new Properties(); if (this.state != null) { properties = this.state.getProperties(); } properties.setProperty(DataSourceProvider.CONN_URL, this.url); properties.setProperty(DataSourceProvider.USERNAME, this.userName); properties.setProperty(DataSourceProvider.PASSWORD, this.passWord); properties.setProperty(DataSourceProvider.CONN_DRIVER, this.driver); if (!StringUtils.isEmpty(this.cryptoKeyLocation)) { properties.setProperty(ConfigurationKeys.ENCRYPT_KEY_LOC, this.cryptoKeyLocation); } if (this.maxIdleConnections != null) { properties.setProperty(DataSourceProvider.MAX_IDLE_CONNS, this.maxIdleConnections.toString()); } if (this.maxActiveConnections != null) { properties.setProperty(DataSourceProvider.MAX_ACTIVE_CONNS, this.maxActiveConnections.toString()); } if (this.useStrongEncryption != null) { properties.setProperty(ConfigurationKeys.ENCRYPT_USE_STRONG_ENCRYPTOR, this.useStrongEncryption.toString()); } if (LOG.isDebugEnabled()) { LOG.debug("Building DataSource with properties " + properties); } return new DataSourceProvider(properties).get(); }
/** * Build a {@link DataWriter}. * * @throws IOException if there is anything wrong building the writer * @return the built {@link DataWriter} */ @Override public DataWriter<GenericRecord> build() throws IOException { State state = this.destination.getProperties(); Properties taskProps = state.getProperties(); return new KafkaDataWriter<>(taskProps); }
private GobblinMetrics initializeMetrics() { ImmutableList.Builder<Tag<?>> tags = ImmutableList.builder(); tags.addAll(this.tags); tags.addAll(Tag.fromMap(ClusterNameTags.getClusterNameTags())); GobblinMetrics gobblinMetrics = GobblinMetrics.get(this.state.getProp(ConfigurationKeys.JOB_NAME_KEY), null, tags.build()); gobblinMetrics.startMetricReporting(this.state.getProperties()); return gobblinMetrics; }
/** * A private method that creates a state store config * @return a filled out config that can be passed on to a state store. */ Config getStateStoreConfig(State state) { // Select and prefix-strip all properties prefixed by WATERMARK_STORAGE_CONFIG_PREFIX Properties properties = state.getProperties(); for (String key : properties.stringPropertyNames()) { if (key.startsWith(WATERMARK_STORAGE_CONFIG_PREFIX)) { properties.setProperty(key.substring(WATERMARK_STORAGE_CONFIG_PREFIX.length()), (String) properties.get(key)); } } Config config = ConfigFactory.parseProperties(properties); // Defaults if (!config.hasPath(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY)) { config = config.withValue(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, ConfigValueFactory.fromAnyRef("/streamingWatermarks")); } return config; }
/** * Whether data should be committed by the job (as opposed to being commited by the tasks). * Data should be committed by the job if either {@link ConfigurationKeys#JOB_COMMIT_POLICY_KEY} is set to "full", * or {@link ConfigurationKeys#PUBLISH_DATA_AT_JOB_LEVEL} is set to true. */ private static boolean shouldCommitDataInJob(State state) { boolean jobCommitPolicyIsFull = JobCommitPolicy.getCommitPolicy(state.getProperties()) == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS; boolean publishDataAtJobLevel = state.getPropAsBoolean(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL, ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL); boolean jobDataPublisherSpecified = !Strings.isNullOrEmpty(state.getProp(ConfigurationKeys.JOB_DATA_PUBLISHER_TYPE)); return jobCommitPolicyIsFull || publishDataAtJobLevel || jobDataPublisherSpecified; }
public HiveConvertPublisher(State state) throws IOException { super(state); this.avroSchemaManager = new AvroSchemaManager(FileSystem.get(HadoopUtils.newConfiguration()), state); this.metricContext = Instrumented.getMetricContext(state, HiveConvertPublisher.class); this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, EventConstants.CONVERSION_NAMESPACE).build(); Configuration conf = new Configuration(); Optional<String> uri = Optional.fromNullable(this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI)); if (uri.isPresent()) { this.fs = FileSystem.get(URI.create(uri.get()), conf); } else { this.fs = FileSystem.get(conf); } try { this.hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(state.getProperties()); } catch (SQLException e) { throw new RuntimeException(e); } this.watermarker = GobblinConstructorUtils.invokeConstructor( HiveSourceWatermarkerFactory.class, state.getProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, HiveSource.DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS)).createFromState(state); this.pool = HiveMetastoreClientPool.get(state.getProperties(), Optional.fromNullable(state.getProperties().getProperty(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); }
@Override public DataWriter<QueryBasedHiveConversionEntity> build() throws IOException { try { return new HiveQueryExecutionWriter(HiveJdbcConnector.newConnectorWithProps(this.destination.getProperties().getProperties()), this.destination.getProperties()); } catch (SQLException e) { throw new RuntimeException(e); } } }
private DatasetStateStore getDatasetStateStore() throws IOException { if (this.stateStore == null) { ClassAliasResolver<DatasetStateStore.Factory> resolver = new ClassAliasResolver<>(DatasetStateStore.Factory.class); String stateStoreType = this.props.getProp(ConfigurationKeys.STATE_STORE_TYPE_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_TYPE); try { DatasetStateStore.Factory stateStoreFactory = resolver.resolveClass(stateStoreType).newInstance(); this.stateStore = stateStoreFactory.createStateStore(ConfigFactory.parseProperties(props.getProperties())); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new IOException(e); } } return this.stateStore; }
/** * Create a KafkaWrapper based on the given type of Kafka API and list of Kafka brokers. * * @param state A {@link State} object that should contain a list of comma separated Kafka brokers * in property "kafka.brokers". It may optionally specify whether to use the new Kafka API by setting * use.new.kafka.api=true. */ public static KafkaWrapper create(State state) { Preconditions.checkNotNull(state.getProp(ConfigurationKeys.KAFKA_BROKERS), "Need to specify at least one Kafka broker."); KafkaWrapper.Builder builder = new KafkaWrapper.Builder(); if (state.getPropAsBoolean(USE_NEW_KAFKA_API, DEFAULT_USE_NEW_KAFKA_API)) { builder = builder.withNewKafkaAPI(); } Config config = ConfigUtils.propertiesToConfig(state.getProperties()); return builder.withBrokers(state.getPropAsList(ConfigurationKeys.KAFKA_BROKERS)) .withConfig(config) .build(); }
private Pair<Optional<Table>, Optional<List<Partition>>> getDestinationTableMeta(String dbName, String tableName, WorkUnitState state) throws DataConversionException { Optional<Table> table = Optional.<Table>absent(); Optional<List<Partition>> partitions = Optional.<List<Partition>>absent(); try { HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { table = Optional.of(client.get().getTable(dbName, tableName)); if (table.isPresent()) { org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get()); if (HiveUtils.isPartitioned(qlTable)) { partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String>absent())); } } } } catch (NoSuchObjectException e) { return ImmutablePair.of(table, partitions); } catch (IOException | TException e) { throw new DataConversionException("Could not fetch destination table metadata", e); } return ImmutablePair.of(table, partitions); }
public HiveBaseExtractor(WorkUnitState state) throws IOException { if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) { return; } this.hiveWorkUnit = new HiveWorkUnit(state.getWorkunit()); this.hiveDataset = hiveWorkUnit.getHiveDataset(); this.dbName = hiveDataset.getDbAndTable().getDb(); this.tableName = hiveDataset.getDbAndTable().getTable(); this.pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); }
private void initRequestAllocator (State state) { try { ResourceEstimator estimator = GobblinConstructorUtils.<ResourceEstimator>invokeLongestConstructor( new ClassAliasResolver(ResourceEstimator.class).resolveClass(state.getProp(ConfigurationKeys.COMPACTION_ESTIMATOR, SimpleDatasetRequest.SimpleDatasetCountEstimator.class.getName()))); RequestAllocatorConfig.Builder<SimpleDatasetRequest> configBuilder = RequestAllocatorConfig.builder(estimator).allowParallelization(1).withLimitedScopeConfig(ConfigBuilder.create() .loadProps(state.getProperties(), ConfigurationKeys.COMPACTION_PRIORITIZATION_PREFIX).build()); if (!state.contains(ConfigurationKeys.COMPACTION_PRIORITIZER_ALIAS)) { allocator = new GreedyAllocator<>(configBuilder.build()); return; } Comparator<SimpleDatasetRequest> prioritizer = GobblinConstructorUtils.<Comparator>invokeLongestConstructor( new ClassAliasResolver(Comparator.class).resolveClass(state.getProp(ConfigurationKeys.COMPACTION_PRIORITIZER_ALIAS)), state); configBuilder.withPrioritizer(prioritizer); if (prioritizer instanceof HierarchicalPrioritizer) { allocator = new HierarchicalAllocator.Factory().createRequestAllocator(configBuilder.build()); } else { allocator = RequestAllocatorUtils.inferFromConfig(configBuilder.build()); } } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Cannot initialize allocator", e); } }