/** * Get the {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit}. * * @return {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit} */ public Extract getExtract() { return new Extract(this.workUnit.getExtract()); }
/** * Constructor. * * @param state a {@link SourceState} the properties of which will be copied into this {@link WorkUnit} instance * @param extract an {@link Extract} * * @deprecated Properties in {@link SourceState} should not be added to a {@link WorkUnit}. Having each * {@link WorkUnit} contain a copy of {@link SourceState} is a waste of memory. Use {@link #create(Extract)}. */ @Deprecated public WorkUnit(SourceState state, Extract extract) { // Values should only be null for deserialization if (state != null) { super.addAll(state); } if (extract != null) { this.extract = extract; } else { this.extract = new Extract(null, null, null, null); } }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Config rootCfg = ConfigUtils.propertiesToConfig(state.getProperties()); Config cfg = rootCfg.hasPath(CONFIG_NAMESPACE) ? rootCfg.getConfig(CONFIG_NAMESPACE) : ConfigFactory.empty(); int numHellos = cfg.hasPath(NUM_HELLOS_KEY) ? cfg.getInt(NUM_HELLOS_KEY) : DEFAULT_NUM_HELLOS; Extract extract = new Extract(TableType.APPEND_ONLY, HelloWorldSource.class.getPackage().getName(), HelloWorldSource.class.getSimpleName()); List<WorkUnit> wus = new ArrayList<>(numHellos); for (int i = 1; i <= numHellos; ++i) { WorkUnit wu = new WorkUnit(extract); wu.setProp(HELLO_ID_FULL_KEY, i); wus.add(wu); } return wus; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); if (!state.contains(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)) { return workUnits; } // Create a single snapshot-type extract for all files Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "ExampleNamespace"), "ExampleTable"); String filesToPull = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL); for (String file : Splitter.on(',').omitEmptyStrings().split(filesToPull)) { // Create one work unit for each file to pull WorkUnit workUnit = WorkUnit.create(extract); workUnit.setProp(SOURCE_FILE_KEY, file); workUnits.add(workUnit); } return workUnits; }
/** * Returns a unique {@link Extract} instance. * Any two calls of this method from the same {@link ExtractFactory} instance guarantees to * return {@link Extract}s with different IDs. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name * @return a unique {@link Extract} instance */ public synchronized Extract getUniqueExtract(TableType type, String namespace, String table) { Extract newExtract = new Extract(type, namespace, table); while (this.createdInstances.contains(newExtract)) { if (Strings.isNullOrEmpty(newExtract.getExtractId())) { newExtract.setExtractId(this.dtf.print(new DateTime())); } else { DateTime extractDateTime = this.dtf.parseDateTime(newExtract.getExtractId()); newExtract.setExtractId(this.dtf.print(extractDateTime.plusSeconds(1))); } } this.createdInstances.add(newExtract); return newExtract; } }
/** * Create a new properly populated {@link Extract} instance. * * <p> * This method should always return a new unique {@link Extract} instance. * </p> * * @param type {@link gobblin.source.workunit.Extract.TableType} * @param namespace namespace of the table this extract belongs to * @param table name of the table this extract belongs to * @return a new unique {@link Extract} instance * * @Deprecated Use {@link gobblin.source.extractor.extract.AbstractSource#createExtract( * gobblin.source.workunit.Extract.TableType, String, String)} */ @Deprecated public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) { Extract extract = new Extract(this, type, namespace, table); while (EXTRACT_SET.contains(extract)) { if (Strings.isNullOrEmpty(extract.getExtractId())) { extract.setExtractId(DTF.print(new DateTime())); } else { DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId()); extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1))); } } EXTRACT_SET.add(extract); return extract; }
String extractId = fileSet.getName().replace(':', '_'); Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, CopyConfiguration.COPY_PREFIX, extractId); List<WorkUnit> workUnitsForPartition = Lists.newArrayList(); for (CopyEntity copyEntity : fileSet.getFiles()) {