@Override public KeyValue<byte[], byte[]> makeRecord(byte[] key, byte[] value) { return new KeyValue<>(key, value); } }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, String>> emitter) throws Exception { emitter.emit(new KeyValue<>(NullWritable.get(), GSON.toJson(input))); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, StructuredRecord>> emitter) { emitter.emit(new KeyValue<>(NullWritable.get(), input)); }
public void transform(KEY key, VALUE value) throws Exception { KeyValue<KEY, VALUE> input = new KeyValue<>(key, value); transformExecutor.runOneIteration(input); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, StructuredRecord>> emitter) { emitter.emit(new KeyValue<>(NullWritable.get(), input)); }
/** * Transform the input received from previous stage to a {@link KeyValue} pair which can be consumed by the output, * as set in {@link BatchSink#prepareRun}. By default, the input object is used as both key and value. * This method is invoked inside the Batch run. * * @param input the input to transform * @param emitter {@link Emitter} to emit data to the next stage * @throws Exception if there's an error during this method invocation */ @Override public void transform(IN input, Emitter<KeyValue<KEY_OUT, VAL_OUT>> emitter) throws Exception { emitter.emit(new KeyValue<>((KEY_OUT) input, (VAL_OUT) input)); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, StructuredRecord>> emitter) { emitter.emit(new KeyValue<>(NullWritable.get(), input)); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], CubeFact>> emitter) throws Exception { emitter.emit(new KeyValue<byte[], CubeFact>(null, transform.transform(input))); } }
@Override protected Function<StructuredRecord, KeyValue<AvroKey<GenericRecord>, NullWritable>> getConversion( TaskAttemptContext context) throws IOException { Configuration hConf = context.getConfiguration(); Schema schema = Schema.parseJson(hConf.get(AvroOutputFormatProvider.SCHEMA_KEY)); StructuredToAvroTransformer transformer = new StructuredToAvroTransformer(schema); return record -> { try { return new KeyValue<>(new AvroKey<>(transformer.transform(record)), NullWritable.get()); } catch (IOException e) { throw new RuntimeException("Unable to transform structured record into a generic record", e); } }; } }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], Put>> emitter) throws Exception { Put put = recordPutTransformer.toPut(input); emitter.emit(new KeyValue<>(put.getRow(), put)); } }
@Override protected KeyValue<byte[], T> computeNext() { Preconditions.checkState(!closed); if (keyValueIterator.hasNext()) { KeyValue<byte[], byte[]> row = keyValueIterator.next(); return new KeyValue<>(row.getKey(), decode(row.getValue())); } close(); return null; }
@Override protected KeyValue<byte[], T> computeNext() { Preconditions.checkState(!closed); if (keyValueIterator.hasNext()) { KeyValue<byte[], byte[]> row = keyValueIterator.next(); return new KeyValue<>(row.getKey(), decode(row.getValue())); } close(); return null; }
/** * Deserializes history row key to get timestamp and metadata entry. * * @param row history row. * @return {@link KeyValue} key is timestamp from history key, value is {@link MetadataEntry}. */ @Nullable private KeyValue<Long, Object> convertFromV1History(Row row) { byte[] rowKey = row.getRow(); // History rows does not store entity type in the key. So to get the entity, we will read the value, deserialize it. // However, in v2 tables, we are going to add type in the history row key. MetadataV1 metadata = V1_GSON.fromJson(row.getString(HISTORY_COLUMN), MetadataV1.class); if (metadata == null) { return null; } long historyTime = MdsHistoryKey.extractTime(rowKey, EntityIdKeyHelper.getV1TargetType(metadata.getEntityId())); // For history we do not care about key and value for MetadataEntry as we are not going to extract that // information for upgrade return new KeyValue<>(historyTime, metadata); }
MetadataMigrator(CConfiguration cConf, DatasetFramework dsFramework, TransactionSystemClient txClient) { // exponentially retry in case of failure to migrate super(RetryStrategies.exponentialDelay(1, 60, TimeUnit.SECONDS)); this.dsFramework = dsFramework; this.batchSize = cConf.getInt(Constants.Metadata.MIGRATOR_BATCH_SIZE); this.transactional = Transactions.createTransactionalWithRetry( Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(dsFramework), txClient, NamespaceId.SYSTEM, Collections.emptyMap(), null, null)), org.apache.tephra.RetryStrategies.retryOnConflict(20, 100) ); datasetIds.add(new KeyValue<>(NamespaceId.SYSTEM.dataset("system.metadata"), NamespaceId.SYSTEM.dataset("v2.system"))); datasetIds.add(new KeyValue<>(NamespaceId.SYSTEM.dataset("business.metadata"), NamespaceId.SYSTEM.dataset("v2.business"))); }
@Override protected KeyValue<byte[], byte[]> computeNext() { if (closed) { return endOfData(); } Row next = scanner.next(); if (next != null) { return new KeyValue<>(next.getRow(), next.get(KEY_COLUMN)); } close(); return null; }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], Put>> emitter) throws Exception { byte[] ts = Bytes.toBytes(System.currentTimeMillis()); byte[] rowkey = Bytes.concat(ts, Bytes.toBytes(UUID.randomUUID())); Put put = new Put(rowkey); put.add(SCHEMA_COL, input.getSchema().toString()); put.add(RECORD_COL, StructuredRecordStringConverter.toJsonString(input)); emitter.emit(new KeyValue<>(rowkey, put)); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<byte[], Put>> emitter) throws Exception { byte[] rowkey = Bytes.toBytes(UUID.randomUUID()); Put put = new Put(rowkey); put.add(SCHEMA_COL, input.getSchema().toString()); put.add(RECORD_COL, StructuredRecordStringConverter.toJsonString(input)); emitter.emit(new KeyValue<>(rowkey, put)); }
@Override public void transform(GROUP_VAL input, Emitter<KeyValue<OUT_KEY, OUT_VAL>> emitter) throws Exception { groupKeyEmitter.reset(); aggregator.groupBy(input, groupKeyEmitter); for (GROUP_KEY groupKey : groupKeyEmitter.getEntries()) { emitter.emit(new KeyValue<>(keyConversion.toWritable(groupKey), valConversion.toWritable(input))); } } }
/** * Deserializes value row key to get metadata entry. * * @param row value row. * @return {@link KeyValue} key is null, value is {@link Object}. */ @Nullable private KeyValue<Long, Object> convertFromV1Value(Row row) { byte[] rowKey = row.getRow(); String targetType = MdsKey.getTargetType(rowKey); NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey); String key = MdsKey.getMetadataKey(targetType, rowKey); byte[] value = row.get(VALUE_COLUMN); if (key == null || value == null) { return null; } return new KeyValue<>(null, new MetadataEntry(namespacedEntityId, key, Bytes.toString(value))); }
@Override public void transform(RecordInfo<INPUT_RECORD> input, Emitter<KeyValue<OUT_KEY, TaggedWritable<OUT_VALUE>>> emitter) throws Exception { String stageName = input.getFromStage(); JOIN_KEY key = joiner.joinOn(stageName, input.getValue()); TaggedWritable<OUT_VALUE> output = new TaggedWritable<>(stageName, inputConversion.toWritable(input.getValue())); emitter.emit(new KeyValue<>(keyConversion.toWritable(key), output)); } }