KeyValueTable table = context.getDataset(config.getTimeTable()); String datesToRead = Bytes.toString(table.read(LAST_TIME_READ)); if (datesToRead == null) {
List<String> processedFiles = new ArrayList<>(); if (!excelInputreaderConfig.reprocess) { KeyValueTable table = batchSourceContext.getDataset(excelInputreaderConfig.memoryTableName); processedFiles = new ArrayList<>(); Calendar cal = Calendar.getInstance();
processedFileTrackingTable = context.getDataset(config.tableName); if (processedFileTrackingTable != null && !config.isReprocessingRequired()) { List<String> processedFiles = new ArrayList<String>();
@Override public void prepareRun(BatchSourceContext context) throws Exception { InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(inputFormatProvider); // Dataset must still be created if macros provided at configure time if (!context.datasetExists(config.getName())) { context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties); } PartitionedFileSet partitionedFileSet = context.getDataset(config.getName()); SnapshotFileSet snapshotFileSet = new SnapshotFileSet(partitionedFileSet); Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties()); if (config.getFileProperties() != null) { arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE); } Schema schema = config.getSchema(); if (schema.getFields() != null) { String formatName = getInputFormatName(); FieldOperation operation = new FieldReadOperation("Read", String.format("Read from SnapshotFile source in %s format.", formatName), EndPoint.of(context.getNamespace(), config.getName()), schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } context.setInput(Input.ofDataset(config.getName(), snapshotFileSet.getInputArguments(arguments))); }