co.cask.cdap.etl.api.batch.BatchSourceContext java code examples

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.ofDataset(config.tableName));
 if (!context.datasetExists(config.runtimeDatasetName)) {
  context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY);
 }
}

@Override
public void prepareRun(BatchSourceContext context) throws DatasetManagementException {
 super.prepareRun(context);
 Schema schema = tableConfig.getSchema();
 if (schema != null && schema.getFields() != null) {
  FieldOperation operation =
   new FieldReadOperation("Read", "Read from Table dataset",
               EndPoint.of(context.getNamespace(), tableConfig.getName()),
               schema.getFields().stream().map(Schema.Field::getName)
                .collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
}

@Override
public void prepareRun(BatchSourceContext context) {
 long duration = TimeParser.parseDuration(streamBatchConfig.duration);
 long delay = Strings.isNullOrEmpty(streamBatchConfig.delay) ? 0 : TimeParser.parseDuration(streamBatchConfig
                                                .delay);
 long endTime = context.getLogicalStartTime() - delay;
 long startTime = endTime - duration;
 LOG.debug("Setting input to Stream : {}", streamBatchConfig.name);
 FormatSpecification formatSpec = streamBatchConfig.getFormatSpec();
 Input input = formatSpec == null ? Input.ofStream(streamBatchConfig.name, startTime, endTime) :
  Input.ofStream(streamBatchConfig.name, startTime, endTime, formatSpec);
 context.setInput(input);
}

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 super.prepareRun(context);
 // Need to create dataset now if macro was provided at configure time
 if (config.getTimeTable() != null && !context.datasetExists(config.getTimeTable())) {
  context.createDataset(config.getTimeTable(), KeyValueTable.class.getName(), DatasetProperties.EMPTY);
 }
}

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
 DatasetProperties datasetProperties = createProperties(inputFormatProvider);
 // Dataset must still be created if macros provided at configure time
 if (!context.datasetExists(config.getName())) {
  context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties);
 }
 PartitionedFileSet partitionedFileSet = context.getDataset(config.getName());
 SnapshotFileSet snapshotFileSet = new SnapshotFileSet(partitionedFileSet);
 Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties());
 if (config.getFileProperties() != null) {
  arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE);
 }
 Schema schema = config.getSchema();
 if (schema.getFields() != null) {
  String formatName = getInputFormatName();
  FieldOperation operation =
   new FieldReadOperation("Read", String.format("Read from SnapshotFile source in %s format.", formatName),
               EndPoint.of(context.getNamespace(), config.getName()),
               schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
 context.setInput(Input.ofDataset(config.getName(), snapshotFileSet.getInputArguments(arguments)));
}

@Override
public void prepareRun(BatchSourceContext context) throws DatasetManagementException, InstantiationException {
 config.validate();
 InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
 DatasetProperties datasetProperties = createProperties(inputFormatProvider);
 // If macros provided at runtime, dataset still needs to be created
 if (!context.datasetExists(config.getName())) {
  String tpfsName = config.getName();
  context.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), datasetProperties);
 }
 Schema schema = config.getSchema();
 if (schema.getFields() != null) {
  String formatName = getInputFormatName();
  FieldOperation operation =
   new FieldReadOperation("Read", String.format("Read from TimePartitionedFileSet in %s format.", formatName),
               EndPoint.of(context.getNamespace(), config.getName()),
               schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
 long duration = TimeParser.parseDuration(config.getDuration());
 long delay = Strings.isNullOrEmpty(config.getDelay()) ? 0 : TimeParser.parseDuration(config.getDelay());
 long endTime = context.getLogicalStartTime() - delay;
 long startTime = endTime - duration;
 Map<String, String> sourceArgs = Maps.newHashMap(datasetProperties.getProperties());
 TimePartitionedFileSetArguments.setInputStartTime(sourceArgs, startTime);
 TimePartitionedFileSetArguments.setInputEndTime(sourceArgs, endTime);
 context.setInput(Input.ofDataset(config.getName(), sourceArgs));
}

config.validate();
if (!Strings.isNullOrEmpty(config.tableName) && !context.datasetExists(config.tableName)) {
 context.createDataset(config.tableName, KeyValueTable.class.getName(), DatasetProperties.EMPTY);
 long startTime = context.getLogicalStartTime();
context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(XMLInputFormat.class, conf)));

MetadataEntity metadataEntity = MetadataEntity.ofDataset(context.getNamespace(), config.tableName);
Map<MetadataScope, Metadata> currentMetadata = context.getMetadata(metadataEntity);
Set<MetadataOperation> operations = GSON.fromJson(config.metadataOperations, SET_METADATA_OPERATION_TYPE);
  case PUT:
   context.addTags(curOperation.getEntity(), curOperation.getMetadata().getTags());
   context.addProperties(curOperation.getEntity(), curOperation.getMetadata().getProperties());
   break;
  case DELETE:
   context.removeTags(curOperation.getEntity(),
             curOperation.getMetadata().getTags().toArray(new String[0]));
   context.removeProperties(curOperation.getEntity(),
                curOperation.getMetadata().getProperties().keySet().toArray(new String[0]));
   break;
  case DELETE_ALL:
   context.removeMetadata(curOperation.getEntity());
   break;
  case DELETE_ALL_TAGS:
   context.removeTags(curOperation.getEntity());
   break;
  case DELETE_ALL_PROPERTIES:
   context.removeProperties(curOperation.getEntity());
   break;
  default:

config.validate();
InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
context.setInput(Input.of(config.getReferenceName(), new SourceInputFormatProvider(inputFormatClass, conf)));

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.of(config.name, new InputFormatProvider() {
  @Override
  public String getInputFormatClassName() {
   return TextInputFormat.class.getCanonicalName();
  }
  @Override
  public Map<String, String> getInputFormatConfiguration() {
   return ImmutableMap.of(TextInputFormat.INPUT_DIR, config.dirName);
  }
 }));
}

Date prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1));
Calendar cal = Calendar.getInstance();
cal.setTime(prevHour);
 KeyValueTable table = context.getDataset(config.getTimeTable());
 String datesToRead = Bytes.toString(table.read(LAST_TIME_READ));
 if (datesToRead == null) {

List<String> processedFiles = new ArrayList<>();
if (!excelInputreaderConfig.reprocess) {
 KeyValueTable table = batchSourceContext.getDataset(excelInputreaderConfig.memoryTableName);
 processedFiles = new ArrayList<>();
 Calendar cal = Calendar.getInstance();

} else if (context != null && !context.datasetExists(excelInputreaderConfig.errorDatasetName)) {
 context.createDataset(excelInputreaderConfig.errorDatasetName, Table.class.getName(), datasetProperties);
if (pipelineConfigurer != null) {
 pipelineConfigurer.createDataset(excelInputreaderConfig.memoryTableName, KeyValueTable.class);
} else if (context != null && !context.datasetExists(excelInputreaderConfig.memoryTableName)) {
 context.createDataset(excelInputreaderConfig.memoryTableName, KeyValueTable.class.getName(),
            DatasetProperties.EMPTY);

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.ofDataset(config.tableName));
 if (config.metadataOperations != null) {
  // if there are metadata operations to be performed then apply them
  processsMetadata(context);
 }
}

processedFileTrackingTable = context.getDataset(config.tableName);
if (processedFileTrackingTable != null && !config.isReprocessingRequired()) {
 List<String> processedFiles = new ArrayList<String>();

 @Override
 public void prepareRun(BatchSourceContext context) throws DatasetManagementException {
  Map<String, String> properties = getProperties();
  // if macros were provided at runtime, dataset needs to be created now
  if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) {
   context.createDataset(properties.get(Properties.BatchReadableWritable.NAME),
              properties.get(Properties.BatchReadableWritable.TYPE),
              DatasetProperties.builder().addAll(properties).build());
  }

  context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME)));
 }
}

@Override
public void prepareRun(BatchSourceContext batchSourceContext) throws Exception {
 excelInputreaderConfig.validate();
 createDatasets(null, batchSourceContext);
 Job job = JobUtils.createInstance();
 String processFiles = "";
 if (!Strings.isNullOrEmpty(excelInputreaderConfig.memoryTableName)) {
  processFiles = GSON.toJson(getAllProcessedFiles(batchSourceContext), ARRAYLIST_PREPROCESSED_FILES);
 }
 ExcelInputFormat.setConfigurations(job, excelInputreaderConfig.filePattern, excelInputreaderConfig.sheet,
                   excelInputreaderConfig.reprocess, excelInputreaderConfig.sheetValue,
                   excelInputreaderConfig.columnList, excelInputreaderConfig.skipFirstRow,
                   excelInputreaderConfig.terminateIfEmptyRow, excelInputreaderConfig.rowsLimit,
                   excelInputreaderConfig.ifErrorRecord, processFiles);
 // Sets the input path(s).
 ExcelInputFormat.addInputPaths(job, excelInputreaderConfig.filePath);
 // Sets the filter based on extended class implementation.
 ExcelInputFormat.setInputPathFilter(job, ExcelReaderRegexFilter.class);
 SourceInputFormatProvider inputFormatProvider = new SourceInputFormatProvider(ExcelInputFormat.class,
                                        job.getConfiguration());
 batchSourceContext.setInput(Input.of(excelInputreaderConfig.referenceName, inputFormatProvider));
}

Javadoc

Context of a Batch Source.

Most used methods

setInput
Overrides the input configuration of this Batch job to the specified Input.
createDataset
datasetExists
getNamespace
newPluginInstance
addProperties
addTags
getDataset
getLogicalStartTime
getMetadata
record
removeMetadata

Popular in Java

Making http requests using okhttp
requestLocationUpdates (LocationManager)
findViewById (Activity)
getContentResolver (Context)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
JButton (javax.swing)
Runner (org.openjdk.jmh.runner)
Top plugins for WebStorm

How to useBatchSourceContext in co.cask.cdap.etl.api.batch

Best Java code snippets using co.cask.cdap.etl.api.batch.BatchSourceContext (Showing top 17 results out of 315)

How to use
BatchSourceContext
in
co.cask.cdap.etl.api.batch