public Extract(TableType type, String namespace, String table) { super(adaptTableType(type), namespace, table); }
/** * Get the {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit}. * * @return {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit} */ public Extract getExtract() { return new Extract(this.workUnit.getExtract()); }
/** * Returns a unique {@link Extract} instance. * Any two calls of this method from the same {@link ExtractFactory} instance guarantees to * return {@link Extract}s with different IDs. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name * @return a unique {@link Extract} instance */ public synchronized Extract getUniqueExtract(TableType type, String namespace, String table) { Extract newExtract = new Extract(type, namespace, table); while (this.createdInstances.contains(newExtract)) { if (Strings.isNullOrEmpty(newExtract.getExtractId())) { newExtract.setExtractId(this.dtf.print(new DateTime())); } else { DateTime extractDateTime = this.dtf.parseDateTime(newExtract.getExtractId()); newExtract.setExtractId(this.dtf.print(extractDateTime.plusSeconds(1))); } } this.createdInstances.add(newExtract); return newExtract; } }
/** * Get the writer output file path corresponding to this {@link Extract}. * * @return writer output file path corresponding to this {@link Extract} */ public String getOutputFilePath() { return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_" + (this.getIsFull() ? "full" : "append"); }
@Override public int hashCode() { return (this.getNamespace() + this.getTable() + this.getExtractId()).hashCode(); }
table.setNamespace(extract.getNamespace()); table.setName(extract.getTable()); if (extract.hasType()) { table.setType(TableTypeEnum.valueOf(extract.getType().name()));
if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) { this.previousTableState.addAll(pre);
/** * If this {@link Extract} has extract table type defined. * * @return <code>true</code> if it has, <code>false</code> otherwise. */ public boolean hasType() { return contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY); }
@Override public boolean equals(Object object) { if (!(object instanceof WorkUnit)) { return false; } WorkUnit other = (WorkUnit) object; return ((this.extract == null && other.extract == null) || (this.extract != null && this.extract.equals(other.extract))) && super.equals(other); }
/** * Constructor. * * @param state a {@link SourceState} the properties of which will be copied into this {@link WorkUnit} instance * @param extract an {@link Extract} * * @deprecated Properties in {@link SourceState} should not be added to a {@link WorkUnit}. Having each * {@link WorkUnit} contain a copy of {@link SourceState} is a waste of memory. Use {@link #create(Extract)}. */ @Deprecated public WorkUnit(SourceState state, Extract extract) { // Values should only be null for deserialization if (state != null) { super.addAll(state); } if (extract != null) { this.extract = extract; } else { this.extract = new Extract(null, null, null, null); } }
/** * Create a new properly populated {@link Extract} instance. * * <p> * This method should always return a new unique {@link Extract} instance. * </p> * * @param type {@link gobblin.source.workunit.Extract.TableType} * @param namespace namespace of the table this extract belongs to * @param table name of the table this extract belongs to * @return a new unique {@link Extract} instance * * @Deprecated Use {@link gobblin.source.extractor.extract.AbstractSource#createExtract( * gobblin.source.workunit.Extract.TableType, String, String)} */ @Deprecated public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) { Extract extract = new Extract(this, type, namespace, table); while (EXTRACT_SET.contains(extract)) { if (Strings.isNullOrEmpty(extract.getExtractId())) { extract.setExtractId(DTF.print(new DateTime())); } else { DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId()); extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1))); } } EXTRACT_SET.add(extract); return extract; }
@Override public boolean equals(Object object) { if (!(object instanceof Extract)) { return false; } Extract other = (Extract) object; return super.equals(other) && this.getNamespace().equals(other.getNamespace()) && this.getTable().equals(other.getTable()) && this.getExtractId().equals(other.getExtractId()); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Config rootCfg = ConfigUtils.propertiesToConfig(state.getProperties()); Config cfg = rootCfg.hasPath(CONFIG_NAMESPACE) ? rootCfg.getConfig(CONFIG_NAMESPACE) : ConfigFactory.empty(); int numHellos = cfg.hasPath(NUM_HELLOS_KEY) ? cfg.getInt(NUM_HELLOS_KEY) : DEFAULT_NUM_HELLOS; Extract extract = new Extract(TableType.APPEND_ONLY, HelloWorldSource.class.getPackage().getName(), HelloWorldSource.class.getSimpleName()); List<WorkUnit> wus = new ArrayList<>(numHellos); for (int i = 1; i <= numHellos; ++i) { WorkUnit wu = new WorkUnit(extract); wu.setProp(HELLO_ID_FULL_KEY, i); wus.add(wu); } return wus; }
@Deprecated public Extract(SourceState state, TableType type, String namespace, String table) { super(state, adaptTableType(type), namespace, table); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); if (!state.contains(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)) { return workUnits; } // Create a single snapshot-type extract for all files Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "ExampleNamespace"), "ExampleTable"); String filesToPull = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL); for (String file : Splitter.on(',').omitEmptyStrings().split(filesToPull)) { // Create one work unit for each file to pull WorkUnit workUnit = WorkUnit.create(extract); workUnit.setProp(SOURCE_FILE_KEY, file); workUnits.add(workUnit); } return workUnits; }
public Extract(TableType type, String namespace, String table) { super(adaptTableType(type), namespace, table); }
String extractId = fileSet.getName().replace(':', '_'); Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, CopyConfiguration.COPY_PREFIX, extractId); List<WorkUnit> workUnitsForPartition = Lists.newArrayList(); for (CopyEntity copyEntity : fileSet.getFiles()) {
@Deprecated public Extract(SourceState state, TableType type, String namespace, String table) { super(state, adaptTableType(type), namespace, table); }