@Override
public void prepareRun(BatchSourceContext context) throws DatasetManagementException, InstantiationException {
config.validate();
InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
DatasetProperties datasetProperties = createProperties(inputFormatProvider);
if (!context.datasetExists(config.getName())) {
String tpfsName = config.getName();
context.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), datasetProperties);
}
Schema schema = config.getSchema();
if (schema.getFields() != null) {
String formatName = getInputFormatName();
FieldOperation operation =
new FieldReadOperation("Read", String.format("Read from TimePartitionedFileSet in %s format.", formatName),
EndPoint.of(context.getNamespace(), config.getName()),
schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()));
context.record(Collections.singletonList(operation));
}
long duration = TimeParser.parseDuration(config.getDuration());
long delay = Strings.isNullOrEmpty(config.getDelay()) ? 0 : TimeParser.parseDuration(config.getDelay());
long endTime = context.getLogicalStartTime() - delay;
long startTime = endTime - duration;
Map<String, String> sourceArgs = Maps.newHashMap(datasetProperties.getProperties());
TimePartitionedFileSetArguments.setInputStartTime(sourceArgs, startTime);
TimePartitionedFileSetArguments.setInputEndTime(sourceArgs, endTime);
context.setInput(Input.ofDataset(config.getName(), sourceArgs));
}