/** * A {@link Dataset} that can be registered in Hive. */ @Alpha public interface HiveRegistrableDataset extends Dataset { /** * Get a list of {@link HiveSpec}s for this dataset, which can be used by {@link org.apache.gobblin.hive.HiveRegister} * to register this dataset in Hive. */ public List<HiveSpec> getHiveSpecs() throws IOException; }
/** * A dummy class representing an ApplicationMaster shutdown request to be used with a * {@link com.google.common.eventbus.EventBus}. * * @author Yinan Li */ @Alpha public class ClusterManagerShutdownRequest { }
/** * A {@link HiveSpec} with a set of {@link Predicate}s. If any of the {@link Predicate}s returns false, * the Hive registration will be skipped. */ @Alpha public interface HiveSpecWithPredicates extends HiveSpec { /** * A {@link Collection} of {@link Predicate}s. If any of the {@link Predicate}s returns false, * the Hive registration will be skipped. */ public Collection<Predicate<HiveRegister>> getPredicates(); }
/** * An interface for implementing streaming / continuous extractors */ @Alpha public interface StreamingExtractor<S, D> extends Extractor<S, D> { /** * Initialize the extractor to be ready to read records * @param watermarkStorage : watermark storage to retrieve previously committed watermarks * @throws IOException : typically if there was a failure in retrieving watermarks */ void start(WatermarkStorage watermarkStorage) throws IOException; }
/** * An {@link Exception} thrown by an {@link ApplicationLauncher}. */ @Alpha public class ApplicationException extends Exception { private static final long serialVersionUID = -7131035635096992762L; public ApplicationException(String message, Throwable cause) { super(message, cause); } }
/** * A {@link HiveSpec} with a set of activities that should be executed prior to the Hive registration. */ @Alpha public interface HiveSpecWithPreActivities extends HiveSpec { /** * A {@link Collection} of {@link Activity}s that should be executed prior to the Hive registration. */ public Collection<Activity> getPreActivities(); }
/** * A {@link HiveSpec} with a set of activities that should be executed after the registration is complete. */ @Alpha public interface HiveSpecWithPostActivities extends HiveSpec { /** * A {@link Collection} of {@link Activity}s that should be executed after the registration is complete. */ public Collection<Activity> getPostActivities(); }
/** * Provides dataset-aware {@link GlobalMetadata}. */ @Alpha public interface DatasetAwareMetadataProvider { /** * @param datasetUrnSource for retrieving dataset urn. * @return returns a given dataset's related {@link GlobalMetadata}. */ GlobalMetadata getGlobalMetadataForDataset(String datasetUrnSource); }
/** * A listener for a {@link Compactor}. */ @Alpha public interface CompactorListener { /** * Invoked after the compaction for a {@link Dataset} has been completed. * * @param dataset the {@link Dataset} whose compaction completed */ public void onDatasetCompactionCompletion(Dataset dataset) throws Exception; }
/*** * A {@link TopologySpec} Factory that creates or generates the {@link TopologySpec} to be used. */ @Alpha public interface TopologySpecFactory { /*** * Create or generate {@link TopologySpec}s. * @return Collection of {@link TopologySpec}s. */ Collection<TopologySpec> getTopologies(); }
/** * A central place for constants of {@link org.apache.gobblin.metrics.MetricContext} tag names for a Gobblin cluster. * Some shared constants have been moved to {@link TimingEvent.FlowEventConstants}. * * @author Yinan Li */ @Alpha public class GobblinClusterMetricTagNames { public static final String APPLICATION_NAME = "application.name"; public static final String APPLICATION_ID = "application.id"; public static final String HELIX_INSTANCE_NAME = "helix.instance.name"; public static final String TASK_RUNNER_ID = "task.runner.id"; }
/** A generic interface for any class that can be configured using a {@link Config}. */ @Alpha public interface Configurable { /** The configuration */ public Config getConfig(); /** The configuration as properties collection for backwards compatibility. */ public Properties getConfigAsProperties(); }
/** * Execute a task for a given input. */ @Alpha public interface Activity { /** * Execute a task given a {@link HiveRegister}. * * @return true if the execution is successful; false otherwise. */ public boolean execute(HiveRegister register) throws IOException; }
/** * Builder to build all types of {@link ObjectStoreOperation}s */ @Alpha public class ObjectStoreOperationBuilder { /** * Get a builder to build a delete operation * @return */ public static ObjectStoreDeleteOperation.Builder deleteBuilder() { return new ObjectStoreDeleteOperation.Builder(); } }
@Alpha public class RecordMetadata { private long offset; public RecordMetadata(long offset) { this.offset = offset; } /** * Get the offset of current record inside of its wrapping batch */ public long getOffset() { return this.offset; } }
/** * A declaration by any Gobblin construct to claim whether it is safe to have multiple speculative attempts. * For example, if any {@link org.apache.gobblin.writer.DataWriter} implements {@link SpeculativeAttemptAwareConstruct} * and returns true in {@link #isSpeculativeAttemptSafe()}, then multiple attempts of one {@link org.apache.gobblin.writer.DataWriter} * should not cause conflict among them. */ @Alpha public interface SpeculativeAttemptAwareConstruct { /** * @return true if it is safe to have multiple speculative attempts; false, otherwise. * To avoid inheritance issue, the suggested pattern would be "return this.class == MyClass.class". */ public boolean isSpeculativeAttemptSafe(); }
/** * Identifies a specific execution of a {@link JobSpec} */ @Alpha public interface JobExecution { /** The URI of the job being executed */ URI getJobSpecURI(); /** The version of the JobSpec being launched */ String getJobSpecVersion(); /** The millisecond timestamp when the job was launched */ long getLaunchTimeMillis(); /** Unique (for the given JobExecutionLauncher) id for this execution */ String getExecutionId(); }
/** * An interface for generating a {@link HiveSpec} for a {@link Path}. * * @author Ziyang Liu */ @Alpha public interface HiveRegistrationPolicy { public static final String MAPREDUCE_JOB_INPUT_PATH_EMPTY_KEY = "mapreduce.job.input.path.empty"; /** * Get a collection of {@link HiveSpec}s for a {@link Path}, which can be used by {@link org.apache.gobblin.hive.HiveRegister} * to register the given {@link Path}. */ public Collection<HiveSpec> getHiveSpecs(Path path) throws IOException; }
/** * Defines an interface for managing a collection of {@JobExecutionStateListener}s */ @Alpha public interface JobExecutionStateListenerContainer { void registerStateListener(JobExecutionStateListener listener); /** Like {@link #registerStateListener(JobExecutionStateListener)} but it will create a weak * reference. The implementation will automatically remove the listener registration once the * listener object gets GCed. * * <p>Note that weak listeners cannot be removed using {@link #unregisterStateListener(JobExecutionStateListener)}. **/ void registerWeakStateListener(JobExecutionStateListener listener); void unregisterStateListener(JobExecutionStateListener listener); }
/** * Represents status of a flow. */ @Alpha @AllArgsConstructor @Getter public class FlowStatus { private final String flowName; private final String flowGroup; private final long flowExecutionId; private final Iterator<JobStatus> jobStatusIterator; }