@Override protected HiveDataset computeNext() { while (this.tables.hasNext()) { DbAndTable dbAndTable = this.tables.next(); try (AutoReturnableObject<IMetaStoreClient> client = HiveDatasetFinder.this.clientPool.getClient()) { Table table = client.get().getTable(dbAndTable.getDb(), dbAndTable.getTable()); Config datasetConfig = getDatasetConfig(table); if (ConfigUtils.getBoolean(datasetConfig, HIVE_DATASET_IS_BLACKLISTED_KEY, DEFAULT_HIVE_DATASET_IS_BLACKLISTED_KEY)) { continue; } if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_FOUND).build().submit(); } return createHiveDataset(table, datasetConfig); } catch (Throwable t) { log.error(String.format("Failed to create HiveDataset for table %s.%s", dbAndTable.getDb(), dbAndTable.getTable()), t); if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_ERROR) .additionalMetadata(FAILURE_CONTEXT, t.toString()).build().submit(); } } } return endOfData(); } };
/** * Submit an sla event when a {@link gobblin.data.management.copy.CopyableFile} is published. The <code>workUnitState</code> passed should have the * required {@link SlaEventKeys} set. * * @see SlaEventSubmitter#submit() * * @param eventSubmitter * @param workUnitState */ static void submitSuccessfulFilePublish(EventSubmitter eventSubmitter, CopyableFile cf, WorkUnitState workUnitState) { String datasetUrn = workUnitState.getProp(SlaEventKeys.DATASET_URN_KEY); String partition = workUnitState.getProp(SlaEventKeys.PARTITION_KEY); String completenessPercentage = workUnitState.getProp(SlaEventKeys.COMPLETENESS_PERCENTAGE_KEY); String recordCount = workUnitState.getProp(SlaEventKeys.RECORD_COUNT_KEY); String previousPublishTimestamp = workUnitState.getProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY); String dedupeStatus = workUnitState.getProp(SlaEventKeys.DEDUPE_STATUS_KEY); SlaEventSubmitter.builder().eventSubmitter(eventSubmitter).eventName(FILE_PUBLISHED_EVENT_NAME) .datasetUrn(datasetUrn).partition(partition).originTimestamp(Long.toString(cf.getOriginTimestamp())) .upstreamTimestamp(Long.toString(cf.getUpstreamTimestamp())).completenessPercentage(completenessPercentage) .recordCount(recordCount).previousPublishTimestamp(previousPublishTimestamp).dedupeStatus(dedupeStatus) .additionalMetadata(TARGET_PATH, cf.getDestination().toString()) .additionalMetadata(SOURCE_PATH, cf.getOrigin().getPath().toString()) .additionalMetadata(SIZE_IN_BYTES, Long.toString(cf.getOrigin().getLen())).build().submit(); } }
/** * Get an {@link SlaEventSubmitterBuilder} that has dataset urn, partition, record count, previous publish timestamp * and dedupe status set. * The caller MUST set eventSubmitter, eventname before submitting. */ public static SlaEventSubmitterBuilder getEventSubmitterBuilder(Dataset dataset, Optional<Job> job, FileSystem fs) { SlaEventSubmitterBuilder builder = SlaEventSubmitter.builder().datasetUrn(dataset.getUrn()) .partition(dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, "")) .dedupeStatus(getOutputDedupeStatus(dataset.jobProps())); long previousPublishTime = getPreviousPublishTime(dataset, fs); long upstreamTime = dataset.jobProps().getPropAsLong(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, -1l); long recordCount = getRecordCount(job); // Previous publish only exists when this is a recompact job if (previousPublishTime != -1l) { builder.previousPublishTimestamp(Long.toString(previousPublishTime)); } // Upstream time is the logical time represented by the compaction input directory if (upstreamTime != -1l) { builder.upstreamTimestamp(Long.toString(upstreamTime)); } if (recordCount != -1l) { builder.recordCount(Long.toString(recordCount)); } return builder; }
static void submitSuccessfulDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition, String originTimestamp, String upstreamTimestamp, Map<String, String> additionalMetadata) { SlaEventSubmitter.builder().eventSubmitter(eventSubmitter).eventName(DATASET_PUBLISHED_EVENT_NAME) .datasetUrn(datasetAndPartition.getDataset().getDatasetURN()).partition(datasetAndPartition.getPartition()) .originTimestamp(originTimestamp).upstreamTimestamp(upstreamTimestamp).additionalMetadata(additionalMetadata) .build().submit(); }