@Override public void prepareRun(BatchSinkContext context) throws Exception { if (!context.datasetExists(config.tableName)) { context.createDataset(config.tableName, "table", DatasetProperties.EMPTY); } context.addOutput(Output.ofDataset(config.tableName)); }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException, InstantiationException { tpfsSinkConfig.validate(); OutputFormatProvider outputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(outputFormatProvider); if (!context.datasetExists(tpfsSinkConfig.name)) { context.createDataset(tpfsSinkConfig.name, TimePartitionedFileSet.class.getName(), datasetProperties); long outputPartitionTime = context.getLogicalStartTime(); if (tpfsSinkConfig.partitionOffset != null) { outputPartitionTime -= TimeParser.parseDuration(tpfsSinkConfig.partitionOffset); tpfsSinkConfig.timeZone); context.addOutput(Output.ofDataset(tpfsSinkConfig.name, sinkArgs)); FieldOperation operation = new FieldWriteOperation("Write", "Wrote to TPFS dataset", EndPoint.of(context.getNamespace(), tpfsSinkConfig.name), schema.getFields().stream().map(Schema.Field::getName) .collect(Collectors.toList())); context.record(Collections.singletonList(operation));
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException, InstantiationException { // if macros were provided, the dataset still needs to be created config.validate(); OutputFormatProvider outputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(outputFormatProvider); if (!context.datasetExists(config.getName())) { context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties); } PartitionedFileSet files = context.getDataset(config.getName()); snapshotFileSet = new SnapshotFileSet(files); // need to use all the dataset properties as arguments in case the dataset already exists, // created by the previous version of this plugin before output format plugins were used. // in that scenario, the output format attached to the dataset properties will be incorrect, // and must be overridden here. Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties()); if (config.getFileProperties() != null) { arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE); } context.addOutput(Output.ofDataset(config.getName(), snapshotFileSet.getOutputArguments(context.getLogicalStartTime(), arguments))); }
@Override public final void prepareRun(BatchSinkContext context) throws InstantiationException { config.validate(); // set format specific properties. OutputFormatProvider outputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); // record field level lineage information // needs to happen before context.addOutput(), otherwise an external dataset without schema will be created. Schema schema = config.getSchema(); if (schema == null) { schema = context.getInputSchema(); } LineageRecorder lineageRecorder = new LineageRecorder(context, config.getReferenceName()); lineageRecorder.createExternalDataset(schema); if (schema != null && schema.getFields() != null && !schema.getFields().isEmpty()) { recordLineage(lineageRecorder, schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); } Map<String, String> outputProperties = new HashMap<>(outputFormatProvider.getOutputFormatConfiguration()); outputProperties.putAll(getFileSystemProperties(context)); outputProperties.put(FileOutputFormat.OUTDIR, getOutputDir(context.getLogicalStartTime())); context.addOutput(Output.of(config.getReferenceName(), new SinkOutputFormatProvider(outputFormatProvider.getOutputFormatClassName(), outputProperties))); }
@Override public void prepareRun(BatchSinkContext context) throws Exception { OutputFormatProvider outputFormatProvider = new BasicOutputFormatProvider(TextOutputFormat.class.getCanonicalName(), ImmutableMap.of(TextOutputFormat.OUTDIR, config.dirName)); if (config.name != null) { Output output = Output.of(config.name, outputFormatProvider); output.alias(config.alias); context.addOutput(output); } else { context.addOutput(Output.of(config.alias, outputFormatProvider)); } }
@Override public void onRunFinish(boolean succeeded, BatchSinkContext context) { if (succeeded && tpfsSinkConfig.cleanPartitionsOlderThan != null) { long cutoffTime = context.getLogicalStartTime() - TimeParser.parseDuration(tpfsSinkConfig.cleanPartitionsOlderThan); TimePartitionedFileSet tpfs = context.getDataset(tpfsSinkConfig.name); for (TimePartitionDetail timePartitionDetail : tpfs.getPartitionsByTime(0, cutoffTime)) { LOG.info("Cleaning up partitions older than {}", tpfsSinkConfig.cleanPartitionsOlderThan); tpfs.dropPartition(timePartitionDetail.getTime()); } } } }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException { super.prepareRun(context); String schemaString = tableSinkConfig.getSchemaStr(); if (schemaString != null) { try { Schema schema = Schema.parseJson(schemaString); if (schema.getFields() != null) { FieldOperation operation = new FieldWriteOperation("Write", "Wrote to CDAP Table", EndPoint.of(context.getNamespace(), tableSinkConfig.getName()), schema.getFields().stream().map(Schema.Field::getName) .collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } } catch (IOException e) { throw new IllegalStateException("Failed to parse schema.", e); } } }
@Override public void onRunFinish(boolean succeeded, BatchSinkContext context) { super.onRunFinish(succeeded, context); if (succeeded) { try { snapshotFileSet.onSuccess(context.getLogicalStartTime()); } catch (Exception e) { LOG.error("Exception updating state file with value of latest snapshot, ", e); } try { if (config.getCleanPartitionsOlderThan() != null) { long cutoffTime = context.getLogicalStartTime() - TimeParser.parseDuration(config.getCleanPartitionsOlderThan()); snapshotFileSet.deleteMatchingPartitionsByTime(cutoffTime); LOG.debug("Cleaned up snapshots older than {}", config.getCleanPartitionsOlderThan()); } } catch (IOException e) { LOG.error("Exception occurred while cleaning up older snapshots", e); } } }
@Override public void prepareRun(BatchSinkContext context) throws Exception { context.addOutput(Output.ofDataset(config.tableName)); if (!context.datasetExists(config.runtimeDatasetName)) { context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.addOutput(Output.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }