/** * Get an instance of {@link SingleTaskDataPublisher}. * * @param dataPublisherClass A concrete class that extends {@link SingleTaskDataPublisher}. * @param state A {@link State} used to instantiate the {@link SingleTaskDataPublisher}. * @return A {@link SingleTaskDataPublisher} instance. * @throws ReflectiveOperationException */ public static SingleTaskDataPublisher getInstance(Class<? extends DataPublisher> dataPublisherClass, State state) throws ReflectiveOperationException { Preconditions.checkArgument(SingleTaskDataPublisher.class.isAssignableFrom(dataPublisherClass), String.format("Cannot instantiate %s since it does not extend %s", dataPublisherClass.getSimpleName(), SingleTaskDataPublisher.class.getSimpleName())); return (SingleTaskDataPublisher) DataPublisher.getInstance(dataPublisherClass, state); } }
/** * Is the publisher cacheable in the SharedResourcesBroker? * @param publisher * @return true if cacheable, else false */ public static boolean isPublisherCacheable(DataPublisher publisher) { // only threadsafe publishers are cacheable. non-threadsafe publishers are marked immediately for invalidation return publisher.supportsCapability(Capability.THREADSAFE, Collections.EMPTY_MAP); }
/** * Commit the output data of a dataset. */ private void commitDataset(Collection<TaskState> taskStates, DataPublisher publisher) { try { publisher.publish(taskStates); } catch (Throwable t) { log.error("Failed to commit dataset", t); setTaskFailureException(taskStates, t); } }
/** * First publish the metadata via {@link DataPublisher#publishMetadata(Collection)}, and then publish the output data * via the {@link DataPublisher#publishData(Collection)} method. * * @param states is a {@link Collection} of {@link WorkUnitState}s. * @throws IOException if there is a problem with publishing the metadata or the data. */ public void publish(Collection<? extends WorkUnitState> states) throws IOException { if (shouldPublishMetadataFirst()) { publishMetadata(states); publishData(states); } else { publishData(states); publishMetadata(states); } }
if (publisher.canBeSkipped()) { log.warn(publisher.getClass() + " will be skipped."); } else { "Cannot persist state upon cancellation because publisher has unfinished work and cannot be skipped."); } else if (this.isMultithreaded && !publisher.isThreadSafe()) { log.warn(String.format( "Gobblin is set up to parallelize publishing, however the publisher %s is not thread-safe. "
/** * First publish the metadata via {@link DataPublisher#publishMetadata(Collection)}, and then publish the output data * via the {@link DataPublisher#publishData(Collection)} method. * * @param states is a {@link Collection} of {@link WorkUnitState}s. * @throws IOException if there is a problem with publishing the metadata or the data. */ public void publish(Collection<? extends WorkUnitState> states) throws IOException { if (shouldPublishMetadataFirst()) { publishMetadata(states); publishData(states); } else { publishData(states); publishMetadata(states); } }
if (publisher.canBeSkipped()) { log.warn(publisher.getClass() + " will be skipped."); } else { "Cannot persist state upon cancellation because publisher has unfinished work and cannot be skipped."); } else if (this.isMultithreaded && !publisher.isThreadSafe()) { log.warn(String.format( "Gobblin is set up to parallelize publishing, however the publisher %s is not thread-safe. "
void checkForUnpublishedWUHandling(String datasetUrn, JobState.DatasetState datasetState, Class<? extends DataPublisher> dataPublisherClass, Closer closer) throws ReflectiveOperationException, IOException { if (UnpublishedHandling.class.isAssignableFrom(dataPublisherClass)) { // pass in jobstate to retrieve properties DataPublisher publisher = closer.register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState())); log.info(String.format("Calling publisher to handle unpublished work units for dataset %s of job %s.", datasetUrn, this.jobContext.getJobId())); ((UnpublishedHandling) publisher).handleUnpublishedWorkUnits(datasetState.getTaskStatesAsWorkUnitStates()); } }
@Test public void testGetNonThreadSafePublisher() throws IOException { SharedResourcesBroker broker = SharedResourcesBrokerFactory.<SimpleScopeType>createDefaultTopLevelBroker(ConfigFactory.empty(), SimpleScopeType.GLOBAL.defaultScopeInstance()); DataPublisher publisher1 = DataPublisherFactory.get(TestNonThreadsafeDataPublisher.class.getName(), null, broker); DataPublisher publisher2 = DataPublisherFactory.get(TestNonThreadsafeDataPublisher.class.getName(), null, broker); // should get different publishers Assert.assertNotEquals(publisher1, publisher2); // Check capabilities Assert.assertTrue(publisher1.supportsCapability(DataPublisher.REUSABLE, Collections.EMPTY_MAP)); Assert.assertFalse(publisher1.supportsCapability(Capability.THREADSAFE, Collections.EMPTY_MAP)); }
/** * Commit the output data of a dataset. */ private void commitDataset(Collection<TaskState> taskStates, DataPublisher publisher) { try { publisher.publish(taskStates); } catch (Throwable t) { log.error("Failed to commit dataset", t); setTaskFailureException(taskStates, t); } }
@SuppressWarnings("unchecked") private Optional<CommitSequence.Builder> generateCommitSequenceBuilder(JobState.DatasetState datasetState, Collection<TaskState> taskStates) { try (Closer closer = Closer.create()) { Class<? extends CommitSequencePublisher> dataPublisherClass = (Class<? extends CommitSequencePublisher>) Class .forName(datasetState .getProp(ConfigurationKeys.DATA_PUBLISHER_TYPE, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE)); CommitSequencePublisher publisher = (CommitSequencePublisher) closer .register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState())); publisher.publish(taskStates); return publisher.getCommitSequenceBuilder(); } catch (Throwable t) { log.error("Failed to generate commit sequence", t); setTaskFailureException(datasetState.getTaskStates(), t); throw Throwables.propagate(t); } }
/** * Is the publisher cacheable in the SharedResourcesBroker? * @param publisher * @return true if cacheable, else false */ public static boolean isPublisherCacheable(DataPublisher publisher) { // only threadsafe publishers are cacheable. non-threadsafe publishers are marked immediately for invalidation return publisher.supportsCapability(Capability.THREADSAFE, Collections.EMPTY_MAP); }
@Override public SharedResourceFactoryResponse<DataPublisher> createResource(SharedResourcesBroker<S> broker, ScopedConfigView<S, DataPublisherKey> config) throws NotConfiguredException { try { DataPublisherKey key = config.getKey(); String publisherClassName = key.getPublisherClassName(); State state = key.getState(); Class<? extends DataPublisher> dataPublisherClass = (Class<? extends DataPublisher>) Class .forName(publisherClassName); log.info("Creating data publisher with class {} in scope {}. ", publisherClassName, config.getScope().toString()); DataPublisher publisher = DataPublisher.getInstance(dataPublisherClass, state); // If the publisher is threadsafe then it is shareable, so return it as a resource instance that may be cached // by the broker. // Otherwise, it is not shareable, so return it as an immediately invalidated resource that will only be returned // once from the broker. if (isPublisherCacheable(publisher)) { return new ResourceInstance<>(publisher); } else { return new ImmediatelyInvalidResourceEntry<>(publisher); } } catch (ReflectiveOperationException e) { throw new RuntimeException(e); } }
/** * Get an instance of {@link SingleTaskDataPublisher}. * * @param dataPublisherClass A concrete class that extends {@link SingleTaskDataPublisher}. * @param state A {@link State} used to instantiate the {@link SingleTaskDataPublisher}. * @return A {@link SingleTaskDataPublisher} instance. * @throws ReflectiveOperationException */ public static SingleTaskDataPublisher getInstance(Class<? extends DataPublisher> dataPublisherClass, State state) throws ReflectiveOperationException { Preconditions.checkArgument(SingleTaskDataPublisher.class.isAssignableFrom(dataPublisherClass), String.format("Cannot instantiate %s since it does not extend %s", dataPublisherClass.getSimpleName(), SingleTaskDataPublisher.class.getSimpleName())); return (SingleTaskDataPublisher) DataPublisher.getInstance(dataPublisherClass, state); } }
void checkForUnpublishedWUHandling(String datasetUrn, JobState.DatasetState datasetState, Class<? extends DataPublisher> dataPublisherClass, Closer closer) throws ReflectiveOperationException, IOException { if (UnpublishedHandling.class.isAssignableFrom(dataPublisherClass)) { // pass in jobstate to retrieve properties DataPublisher publisher = closer.register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState())); log.info(String.format("Calling publisher to handle unpublished work units for dataset %s of job %s.", datasetUrn, this.jobContext.getJobId())); ((UnpublishedHandling) publisher).handleUnpublishedWorkUnits(datasetState.getTaskStatesAsWorkUnitStates()); } }
@SuppressWarnings("unchecked") private Optional<CommitSequence.Builder> generateCommitSequenceBuilder(JobState.DatasetState datasetState, Collection<TaskState> taskStates) { try (Closer closer = Closer.create()) { Class<? extends CommitSequencePublisher> dataPublisherClass = (Class<? extends CommitSequencePublisher>) Class .forName(datasetState .getProp(ConfigurationKeys.DATA_PUBLISHER_TYPE, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE)); CommitSequencePublisher publisher = (CommitSequencePublisher) closer .register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState())); publisher.publish(taskStates); return publisher.getCommitSequenceBuilder(); } catch (Throwable t) { log.error("Failed to generate commit sequence", t); setTaskFailureException(datasetState.getTaskStates(), t); throw Throwables.propagate(t); } }
@Override public SharedResourceFactoryResponse<DataPublisher> createResource(SharedResourcesBroker<S> broker, ScopedConfigView<S, DataPublisherKey> config) throws NotConfiguredException { try { DataPublisherKey key = config.getKey(); String publisherClassName = key.getPublisherClassName(); State state = key.getState(); Class<? extends DataPublisher> dataPublisherClass = (Class<? extends DataPublisher>) Class .forName(publisherClassName); log.info("Creating data publisher with class {} in scope {}. ", publisherClassName, config.getScope().toString()); DataPublisher publisher = DataPublisher.getInstance(dataPublisherClass, state); // If the publisher is threadsafe then it is shareable, so return it as a resource instance that may be cached // by the broker. // Otherwise, it is not shareable, so return it as an immediately invalidated resource that will only be returned // once from the broker. if (isPublisherCacheable(publisher)) { return new ResourceInstance<>(publisher); } else { return new ImmediatelyInvalidResourceEntry<>(publisher); } } catch (ReflectiveOperationException e) { throw new RuntimeException(e); } }