/** * Get the {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit}. * * @return {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit} */ public Extract getExtract() { return new Extract(this.workUnit.getExtract()); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { int numWorkUnits = state.getPropAsInt(NUM_WORK_UNITS_KEY, DEFAULT_NUM_WORK_UNITS); Extract extract = new Extract(TableType.APPEND_ONLY, StressTestingSource.class.getPackage().getName(), StressTestingSource.class.getSimpleName()); List<WorkUnit> wus = new ArrayList<>(numWorkUnits); for (int i = 1; i <= numWorkUnits; ++i) { WorkUnit wu = new WorkUnit(extract); wus.add(wu); } return wus; }
/** * Constructor. * * @param state a {@link SourceState} the properties of which will be copied into this {@link WorkUnit} instance * @param extract an {@link Extract} * * @deprecated Properties in {@link SourceState} should not be added to a {@link WorkUnit}. Having each * {@link WorkUnit} contain a copy of {@link SourceState} is a waste of memory. Use {@link #create(Extract)}. */ @Deprecated public WorkUnit(SourceState state, Extract extract) { // Values should only be null for deserialization if (state != null) { super.addAll(state); } if (extract != null) { this.extract = extract; } else { this.extract = new Extract(null, null, null, null); } }
private Extract getExtractForFile(PartitionAwareFileRetriever.FileInfo file, String topicName, String namespace, Map<Long, Extract> extractMap) { Extract extract = extractMap.get(file.getWatermarkMsSinceEpoch()); if (extract == null) { // Create an extract object for the dayPath extract = new Extract(this.tableType, namespace, topicName); LOG.info("Created extract: " + extract.getExtractId() + " for path " + topicName); extractMap.put(file.getWatermarkMsSinceEpoch(), extract); } return extract; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Config rootCfg = ConfigUtils.propertiesToConfig(state.getProperties()); Config cfg = rootCfg.hasPath(CONFIG_NAMESPACE) ? rootCfg.getConfig(CONFIG_NAMESPACE) : ConfigFactory.empty(); int numHellos = cfg.hasPath(NUM_HELLOS_KEY) ? cfg.getInt(NUM_HELLOS_KEY) : DEFAULT_NUM_HELLOS; Extract extract = new Extract(TableType.APPEND_ONLY, HelloWorldSource.class.getPackage().getName(), HelloWorldSource.class.getSimpleName()); List<WorkUnit> wus = new ArrayList<>(numHellos); for (int i = 1; i <= numHellos; ++i) { WorkUnit wu = new WorkUnit(extract); wu.setProp(HELLO_ID_FULL_KEY, i); wus.add(wu); } return wus; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); if (!state.contains(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)) { return workUnits; } // Create a single snapshot-type extract for all files Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "ExampleNamespace"), "ExampleTable"); String filesToPull = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL); for (String file : Splitter.on(',').omitEmptyStrings().split(filesToPull)) { // Create one work unit for each file to pull WorkUnit workUnit = WorkUnit.create(extract); workUnit.setProp(SOURCE_FILE_KEY, file); workUnits.add(workUnit); } return workUnits; }
/** * Returns a unique {@link Extract} instance. * Any two calls of this method from the same {@link ExtractFactory} instance guarantees to * return {@link Extract}s with different IDs. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name * @return a unique {@link Extract} instance */ public synchronized Extract getUniqueExtract(TableType type, String namespace, String table) { Extract newExtract = new Extract(type, namespace, table); while (this.createdInstances.contains(newExtract)) { if (Strings.isNullOrEmpty(newExtract.getExtractId())) { newExtract.setExtractId(this.dtf.print(new DateTime())); } else { DateTime extractDateTime = this.dtf.parseDateTime(newExtract.getExtractId()); newExtract.setExtractId(this.dtf.print(extractDateTime.plusSeconds(1))); } } this.createdInstances.add(newExtract); return newExtract; } }
public WorkUnit getWorkUnitWithWeight(long weight) { WorkUnit workUnit = new WorkUnit(new Extract(Extract.TableType.APPEND_ONLY, "", "")); workUnit.setProp(WEIGHT, Long.toString(weight)); return workUnit; }
/** * Create a new properly populated {@link Extract} instance. * * <p> * This method should always return a new unique {@link Extract} instance. * </p> * * @param type {@link org.apache.gobblin.source.workunit.Extract.TableType} * @param namespace namespace of the table this extract belongs to * @param table name of the table this extract belongs to * @return a new unique {@link Extract} instance * * @Deprecated Use {@link org.apache.gobblin.source.extractor.extract.AbstractSource#createExtract( *org.apache.gobblin.source.workunit.Extract.TableType, String, String)} */ @Deprecated public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) { Extract extract = new Extract(this, type, namespace, table); while (EXTRACT_SET.contains(extract)) { if (Strings.isNullOrEmpty(extract.getExtractId())) { extract.setExtractId(DTF.print(new DateTime())); } else { DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId()); extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1))); } } EXTRACT_SET.add(extract); return extract; }
@BeforeClass public static void setUp() { WorkUnit workUnit = new WorkUnit(new SourceState(), new Extract(new SourceState(), Extract.TableType.SNAPSHOT_ONLY, "namespace", "dummy_table")); state = new WorkUnitState(workUnit); Type listType = new TypeToken<JsonObject>() { }.getType(); Gson gson = new Gson(); testData = gson.fromJson(new InputStreamReader( JsonElementConversionFactoryTest.class.getResourceAsStream("/converter/JsonElementConversionFactoryTest.json")), listType); }
private TaskState getStreamingTaskState() { WorkUnitState workUnitState = new WorkUnitState(WorkUnit.create( new Extract(Extract.TableType.SNAPSHOT_ONLY, this.getClass().getName(), this.getClass().getSimpleName()))); workUnitState.setProp(ConfigurationKeys.TASK_KEY_KEY, "1234"); TaskState taskState = new TaskState(workUnitState); taskState.setProp(ConfigurationKeys.METRICS_ENABLED_KEY, Boolean.toString(false)); taskState.setProp(TaskConfigurationKeys.TASK_EXECUTION_MODE, ExecutionModel.STREAMING.name()); taskState.setJobId("1234"); taskState.setTaskId("testContinuousTaskId"); return taskState; }
private JsonObject initResources(String resourceFilePath) { Type listType = new TypeToken<JsonObject>() { }.getType(); Gson gson = new Gson(); JsonObject testData = gson.fromJson(new InputStreamReader(this.getClass().getResourceAsStream(resourceFilePath)), listType); jsonRecord = testData.get("record").getAsJsonObject(); jsonSchema = testData.get("schema").getAsJsonArray(); WorkUnit workUnit = new WorkUnit(new SourceState(), new Extract(new SourceState(), Extract.TableType.SNAPSHOT_ONLY, "namespace", "dummy_table")); state = new WorkUnitState(workUnit); state.setProp(ConfigurationKeys.CONVERTER_AVRO_TIME_FORMAT, "HH:mm:ss"); state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "PST"); return testData; }
@Test public void testGetDefaultWriterFilePath() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit state = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path(state.getExtract().getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 2, 0), new Path(state.getExtract().getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
TaskState getEmptyTestTaskState(String taskId) { // Create a TaskState WorkUnit workUnit = WorkUnit.create( new Extract(Extract.TableType.SNAPSHOT_ONLY, this.getClass().getName(), this.getClass().getSimpleName())); workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY, "taskKey"); TaskState taskState = new TaskState(new WorkUnitState(workUnit)); taskState.setProp(ConfigurationKeys.METRICS_ENABLED_KEY, Boolean.toString(false)); taskState.setTaskId(taskId); taskState.setJobId("1234"); return taskState; }
TaskState getEmptyTestTaskState(String taskId) { // Create a TaskState WorkUnit workUnit = WorkUnit.create( new Extract(Extract.TableType.SNAPSHOT_ONLY, this.getClass().getName(), this.getClass().getSimpleName())); workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY, "taskKey"); TaskState taskState = new TaskState(new WorkUnitState(workUnit)); taskState.setProp(ConfigurationKeys.METRICS_ENABLED_KEY, Boolean.toString(false)); taskState.setTaskId(taskId); taskState.setJobId("1234"); return taskState; }
@Test public void testGetDefaultWriterFilePathWithWorkUnitState() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit workUnit = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); WorkUnitState workUnitState = new WorkUnitState(workUnit); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 0, 0), new Path(workUnitState.getExtract() .getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 2, 0), new Path(workUnitState.getExtract() .getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
@Test public void testGetWriterFilePath() { Extract extract = new Extract(TableType.SNAPSHOT_ONLY, "org.apache.gobblin.dbNamespace", "tableName"); WorkUnit state = WorkUnit.create(extract); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, TEST_WRITER_FILE_PATH); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), TEST_WRITER_FILE_PATH); state.setProp(ConfigurationKeys.WRITER_FILE_PATH + ".0", TEST_WRITER_FILE_PATH); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 1, 1), TEST_WRITER_FILE_PATH); state.removeProp(ConfigurationKeys.WRITER_FILE_PATH); state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "tablename"); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path("tableName")); state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "namespace_table"); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path("org/apache/gobblin/dbNamespace/tableName")); }
public static WorkUnitState getWorkUnitState1() { WorkUnit wu = new WorkUnit(new Extract(Extract.TableType.APPEND_ONLY, "namespace", "table")); wu.setWatermarkInterval( new WatermarkInterval(new LongWatermark(20160101235959L), new LongWatermark(20160102235959L))); State js = new State(); return new WorkUnitState(wu, js); } }
String extractId = fileSet.getName().replace(':', '_'); Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY, CopyConfiguration.COPY_PREFIX, extractId); List<WorkUnit> workUnitsForPartition = Lists.newArrayList(); for (CopyEntity copyEntity : fileSet.getFiles()) {
SourceEntity sourceEntity = SourceEntity.fromSourceEntityName(sourceEntityName); sourceEntities[i] = sourceEntity; extracts[i] = new Extract(TableType.APPEND_ONLY, "", sourceEntity.getDestTableName()); for (int j = 0; j < 3; ++j) { WorkUnit wu = new WorkUnit(extracts[i]);