@Override public List<WorkUnit> getWorkunits(SourceState state) { initLogger(state); lineageInfo = LineageInfo.getLineageInfo(state.getBroker()); Set<SourceEntity> entities = getFilteredSourceEntities(state); Map<SourceEntity, State> tableSpecificPropsMap = shouldObtainTablePropsFromConfigStore(state) ? getTableSpecificPropsFromConfigStore(entities, state) : getTableSpecificPropsFromState(entities, state); Map<SourceEntity, Long> prevWatermarksByTable = getPreviousWatermarksForAllTables(state); sourceEntity, !entities.contains(sourceEntity)); SourceState combinedState = getCombinedState(state, tableSpecificPropsMap.get(sourceEntity)); long previousWatermark = prevWatermarksByTable.containsKey(sourceEntity) ? prevWatermarksByTable.get(sourceEntity) workUnits.addAll(generateWorkUnits(sourceEntity, combinedState, previousWatermark)); List<WorkUnit> previousWorkUnits = this.getPreviousWorkUnitsForRetry(state); log.info("Total number of incomplete tasks from the previous run: " + previousWorkUnits.size()); workUnits.addAll(previousWorkUnits); state.getPropAsInt(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY, ConfigurationKeys.DEFAULT_MR_JOB_MAX_MAPPERS); return pack(workUnits, numOfMultiWorkunits);
protected Set<SourceEntity> getFilteredSourceEntities(SourceState state) { Set<SourceEntity> unfilteredEntities = getSourceEntities(state); return getFilteredSourceEntitiesHelper(state, unfilteredEntities); }
private Set<SourceEntity> getFilteredEntities(SourceState state) { Set<SourceEntity> unfiltered = QueryBasedSource.getSourceEntitiesHelper(state); return QueryBasedSource.getFilteredSourceEntitiesHelper(state, unfiltered); }
protected List<WorkUnit> generateWorkUnits(SourceEntity sourceEntity, SourceState state, long previousWatermark) { List<WorkUnit> workUnits = Lists.newArrayList(); String nameSpaceName = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<Partition> partitions = new Partitioner(state).getPartitionList(previousWatermark); Collections.sort(partitions, Partitioner.ascendingComparator); // {@link ConfigurationKeys.EXTRACT_TABLE_NAME_KEY} specify the output path for Extract String outputTableName = sourceEntity.getDestTableName(); log.info("Create extract output with table name is " + outputTableName); Extract extract = createExtract(tableType, nameSpaceName, outputTableName); // Setting current time for the full extract if (Boolean.valueOf(state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY))) { extract.setFullTrue(System.currentTimeMillis()); } for (Partition partition : partitions) { WorkUnit workunit = WorkUnit.create(extract); workunit.setProp(ConfigurationKeys.SOURCE_ENTITY, sourceEntity.getSourceEntityName()); workunit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, sourceEntity.getDestTableName()); workunit.setProp(WORK_UNIT_STATE_VERSION_KEY, CURRENT_WORK_UNIT_STATE_VERSION); addLineageSourceInfo(state, sourceEntity, workunit); partition.serialize(workunit); workUnits.add(workunit); } return workUnits; }
protected Set<SourceEntity> getSourceEntities(State state) { return getSourceEntitiesHelper(state); }
protected Set<SourceEntity> getSourceEntities(State state) { if (!state.getPropAsBoolean(USE_ALL_OBJECTS, DEFAULT_USE_ALL_OBJECTS)) { return super.getSourceEntities(state); } SalesforceConnector connector = getConnector(state); try { if (!connector.connect()) { throw new RuntimeException("Failed to connect."); } } catch (RestApiConnectionException e) { throw new RuntimeException("Failed to connect.", e); } List<Command> commands = RestApiConnector.constructGetCommand(connector.getFullUri("/sobjects")); try { CommandOutput<?, ?> response = connector.getResponse(commands); Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator(); if (itr.hasNext()) { String next = itr.next(); return getSourceEntities(next); } throw new RuntimeException("Unable to retrieve source entities"); } catch (RestApiProcessingException e) { throw Throwables.propagate(e); } }
@Test public void testGetTableSpecificPropsFromState() { SourceState state = new SourceState(); state.setProp(DatasetUtils.DATASET_SPECIFIC_PROPS, "[{\"dataset\":\"Entity1\", \"value\": 1}, {\"dataset\":\"Table2\", \"value\":2}]"); // We should look in the dataset specific properties using the entity name, not table name SourceEntity se1 = new SourceEntity("Entity1", "Table2"); SourceEntity se3 = new SourceEntity("Entity3", "Table3"); Set<SourceEntity> entities = ImmutableSet.of(se1, se3); Map<SourceEntity, State> datasetProps = QueryBasedSource.getTableSpecificPropsFromState(entities, state); // Value 1 should be returned for se1, no prpos should be returned for se3 Assert.assertEquals(datasetProps.size(), 1); Assert.assertTrue(datasetProps.containsKey(se1)); State se1Props = datasetProps.get(se1); Assert.assertEquals(se1Props.getProp("value"), "1"); }
return super.generateWorkUnits(sourceEntity, state, previousWatermark); state.setProp(Partitioner.IS_EARLY_STOPPED, isEarlyStopped); return super.generateWorkUnits(sourceEntity, state, previousWatermark);
QueryBasedSource.getPreviousWatermarksForAllTables(prevState); Assert.assertEquals(previousWM.size(), 3);
protected List<WorkUnit> generateWorkUnits(SourceEntity sourceEntity, SourceState state, long previousWatermark) { List<WorkUnit> workUnits = Lists.newArrayList(); String nameSpaceName = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<Partition> partitions = new Partitioner(state).getPartitionList(previousWatermark); Collections.sort(partitions, Partitioner.ascendingComparator); // {@link ConfigurationKeys.EXTRACT_TABLE_NAME_KEY} specify the output path for Extract String outputTableName = sourceEntity.getDestTableName(); log.info("Create extract output with table name is " + outputTableName); Extract extract = createExtract(tableType, nameSpaceName, outputTableName); // Setting current time for the full extract if (Boolean.valueOf(state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY))) { extract.setFullTrue(System.currentTimeMillis()); } for (Partition partition : partitions) { WorkUnit workunit = WorkUnit.create(extract); workunit.setProp(ConfigurationKeys.SOURCE_ENTITY, sourceEntity.getSourceEntityName()); workunit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, sourceEntity.getDestTableName()); workunit.setProp(WORK_UNIT_STATE_VERSION_KEY, CURRENT_WORK_UNIT_STATE_VERSION); addLineageSourceInfo(state, sourceEntity, workunit); partition.serialize(workunit); workUnits.add(workunit); } return workUnits; }
protected Set<SourceEntity> getSourceEntities(State state) { return getSourceEntitiesHelper(state); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { initLogger(state); lineageInfo = LineageInfo.getLineageInfo(state.getBroker()); Set<SourceEntity> entities = getFilteredSourceEntities(state); Map<SourceEntity, State> tableSpecificPropsMap = shouldObtainTablePropsFromConfigStore(state) ? getTableSpecificPropsFromConfigStore(entities, state) : getTableSpecificPropsFromState(entities, state); Map<SourceEntity, Long> prevWatermarksByTable = getPreviousWatermarksForAllTables(state); sourceEntity, !entities.contains(sourceEntity)); SourceState combinedState = getCombinedState(state, tableSpecificPropsMap.get(sourceEntity)); long previousWatermark = prevWatermarksByTable.containsKey(sourceEntity) ? prevWatermarksByTable.get(sourceEntity) workUnits.addAll(generateWorkUnits(sourceEntity, combinedState, previousWatermark)); List<WorkUnit> previousWorkUnits = this.getPreviousWorkUnitsForRetry(state); log.info("Total number of incomplete tasks from the previous run: " + previousWorkUnits.size()); workUnits.addAll(previousWorkUnits); state.getPropAsInt(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY, ConfigurationKeys.DEFAULT_MR_JOB_MAX_MAPPERS); return pack(workUnits, numOfMultiWorkunits);
protected Set<SourceEntity> getFilteredSourceEntities(SourceState state) { Set<SourceEntity> unfilteredEntities = getSourceEntities(state); return getFilteredSourceEntitiesHelper(state, unfilteredEntities); }