@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.ofDataset(config.tableName)); if (!context.datasetExists(config.runtimeDatasetName)) { context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException { super.prepareRun(context); Schema schema = tableConfig.getSchema(); if (schema != null && schema.getFields() != null) { FieldOperation operation = new FieldReadOperation("Read", "Read from Table dataset", EndPoint.of(context.getNamespace(), tableConfig.getName()), schema.getFields().stream().map(Schema.Field::getName) .collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } }
@Override public void prepareRun(BatchSourceContext context) { long duration = TimeParser.parseDuration(streamBatchConfig.duration); long delay = Strings.isNullOrEmpty(streamBatchConfig.delay) ? 0 : TimeParser.parseDuration(streamBatchConfig .delay); long endTime = context.getLogicalStartTime() - delay; long startTime = endTime - duration; LOG.debug("Setting input to Stream : {}", streamBatchConfig.name); FormatSpecification formatSpec = streamBatchConfig.getFormatSpec(); Input input = formatSpec == null ? Input.ofStream(streamBatchConfig.name, startTime, endTime) : Input.ofStream(streamBatchConfig.name, startTime, endTime, formatSpec); context.setInput(input); }
@Override public void prepareRun(BatchSourceContext context) throws Exception { super.prepareRun(context); // Need to create dataset now if macro was provided at configure time if (config.getTimeTable() != null && !context.datasetExists(config.getTimeTable())) { context.createDataset(config.getTimeTable(), KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void prepareRun(BatchSourceContext context) throws Exception { InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(inputFormatProvider); // Dataset must still be created if macros provided at configure time if (!context.datasetExists(config.getName())) { context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties); } PartitionedFileSet partitionedFileSet = context.getDataset(config.getName()); SnapshotFileSet snapshotFileSet = new SnapshotFileSet(partitionedFileSet); Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties()); if (config.getFileProperties() != null) { arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE); } Schema schema = config.getSchema(); if (schema.getFields() != null) { String formatName = getInputFormatName(); FieldOperation operation = new FieldReadOperation("Read", String.format("Read from SnapshotFile source in %s format.", formatName), EndPoint.of(context.getNamespace(), config.getName()), schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } context.setInput(Input.ofDataset(config.getName(), snapshotFileSet.getInputArguments(arguments))); }
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException, InstantiationException { config.validate(); InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(inputFormatProvider); // If macros provided at runtime, dataset still needs to be created if (!context.datasetExists(config.getName())) { String tpfsName = config.getName(); context.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), datasetProperties); } Schema schema = config.getSchema(); if (schema.getFields() != null) { String formatName = getInputFormatName(); FieldOperation operation = new FieldReadOperation("Read", String.format("Read from TimePartitionedFileSet in %s format.", formatName), EndPoint.of(context.getNamespace(), config.getName()), schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } long duration = TimeParser.parseDuration(config.getDuration()); long delay = Strings.isNullOrEmpty(config.getDelay()) ? 0 : TimeParser.parseDuration(config.getDelay()); long endTime = context.getLogicalStartTime() - delay; long startTime = endTime - duration; Map<String, String> sourceArgs = Maps.newHashMap(datasetProperties.getProperties()); TimePartitionedFileSetArguments.setInputStartTime(sourceArgs, startTime); TimePartitionedFileSetArguments.setInputEndTime(sourceArgs, endTime); context.setInput(Input.ofDataset(config.getName(), sourceArgs)); }
config.validate(); if (!Strings.isNullOrEmpty(config.tableName) && !context.datasetExists(config.tableName)) { context.createDataset(config.tableName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); long startTime = context.getLogicalStartTime(); context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(XMLInputFormat.class, conf)));
MetadataEntity metadataEntity = MetadataEntity.ofDataset(context.getNamespace(), config.tableName); Map<MetadataScope, Metadata> currentMetadata = context.getMetadata(metadataEntity); Set<MetadataOperation> operations = GSON.fromJson(config.metadataOperations, SET_METADATA_OPERATION_TYPE); case PUT: context.addTags(curOperation.getEntity(), curOperation.getMetadata().getTags()); context.addProperties(curOperation.getEntity(), curOperation.getMetadata().getProperties()); break; case DELETE: context.removeTags(curOperation.getEntity(), curOperation.getMetadata().getTags().toArray(new String[0])); context.removeProperties(curOperation.getEntity(), curOperation.getMetadata().getProperties().keySet().toArray(new String[0])); break; case DELETE_ALL: context.removeMetadata(curOperation.getEntity()); break; case DELETE_ALL_TAGS: context.removeTags(curOperation.getEntity()); break; case DELETE_ALL_PROPERTIES: context.removeProperties(curOperation.getEntity()); break; default:
config.validate(); InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); context.setInput(Input.of(config.getReferenceName(), new SourceInputFormatProvider(inputFormatClass, conf)));
@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.of(config.name, new InputFormatProvider() { @Override public String getInputFormatClassName() { return TextInputFormat.class.getCanonicalName(); } @Override public Map<String, String> getInputFormatConfiguration() { return ImmutableMap.of(TextInputFormat.INPUT_DIR, config.dirName); } })); }
Date prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1)); Calendar cal = Calendar.getInstance(); cal.setTime(prevHour); KeyValueTable table = context.getDataset(config.getTimeTable()); String datesToRead = Bytes.toString(table.read(LAST_TIME_READ)); if (datesToRead == null) {
List<String> processedFiles = new ArrayList<>(); if (!excelInputreaderConfig.reprocess) { KeyValueTable table = batchSourceContext.getDataset(excelInputreaderConfig.memoryTableName); processedFiles = new ArrayList<>(); Calendar cal = Calendar.getInstance();
} else if (context != null && !context.datasetExists(excelInputreaderConfig.errorDatasetName)) { context.createDataset(excelInputreaderConfig.errorDatasetName, Table.class.getName(), datasetProperties); if (pipelineConfigurer != null) { pipelineConfigurer.createDataset(excelInputreaderConfig.memoryTableName, KeyValueTable.class); } else if (context != null && !context.datasetExists(excelInputreaderConfig.memoryTableName)) { context.createDataset(excelInputreaderConfig.memoryTableName, KeyValueTable.class.getName(), DatasetProperties.EMPTY);
@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.ofDataset(config.tableName)); if (config.metadataOperations != null) { // if there are metadata operations to be performed then apply them processsMetadata(context); } }
processedFileTrackingTable = context.getDataset(config.tableName); if (processedFileTrackingTable != null && !config.isReprocessingRequired()) { List<String> processedFiles = new ArrayList<String>();
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); // if macros were provided at runtime, dataset needs to be created now if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }
@Override public void prepareRun(BatchSourceContext batchSourceContext) throws Exception { excelInputreaderConfig.validate(); createDatasets(null, batchSourceContext); Job job = JobUtils.createInstance(); String processFiles = ""; if (!Strings.isNullOrEmpty(excelInputreaderConfig.memoryTableName)) { processFiles = GSON.toJson(getAllProcessedFiles(batchSourceContext), ARRAYLIST_PREPROCESSED_FILES); } ExcelInputFormat.setConfigurations(job, excelInputreaderConfig.filePattern, excelInputreaderConfig.sheet, excelInputreaderConfig.reprocess, excelInputreaderConfig.sheetValue, excelInputreaderConfig.columnList, excelInputreaderConfig.skipFirstRow, excelInputreaderConfig.terminateIfEmptyRow, excelInputreaderConfig.rowsLimit, excelInputreaderConfig.ifErrorRecord, processFiles); // Sets the input path(s). ExcelInputFormat.addInputPaths(job, excelInputreaderConfig.filePath); // Sets the filter based on extended class implementation. ExcelInputFormat.setInputPathFilter(job, ExcelReaderRegexFilter.class); SourceInputFormatProvider inputFormatProvider = new SourceInputFormatProvider(ExcelInputFormat.class, job.getConfiguration()); batchSourceContext.setInput(Input.of(excelInputreaderConfig.referenceName, inputFormatProvider)); }