/** * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b> * * <p>Applies the given {@code PTransform} to this input {@code InputT} and returns its {@code * OutputT}. This uses {@code name} to identify this specific application of the transform. This * name is used in various places, including the monitoring UI, logging, and to stably identify * this application node in the {@link Pipeline} graph during update. * * <p>Each {@link PInput} subclass that provides an {@code apply} method should delegate to this * method to ensure proper registration with the {@link PipelineRunner}. */ @Internal public static <InputT extends PInput, OutputT extends POutput> OutputT applyTransform( String name, InputT input, PTransform<? super InputT, OutputT> transform) { return input.getPipeline().applyInternal(name, input, transform); }
@Override public PDone expand(PInput input) { return PDone.in(input.getPipeline()); } });
private AppliedPTransform<?, ?, ?> appliedPTransform( String fullName, PTransform<PInput, POutput> transform, Pipeline p) { PInput input = mock(PInput.class); when(input.getPipeline()).thenReturn(p); return AppliedPTransform.of( fullName, Collections.emptyMap(), Collections.emptyMap(), transform, p); }
/** * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b> * * <p>Like {@link #applyTransform(String, PInput, PTransform)} but defaulting to the name provided * by the {@link PTransform}. */ @Internal public static <InputT extends PInput, OutputT extends POutput> OutputT applyTransform( InputT input, PTransform<? super InputT, OutputT> transform) { return input.getPipeline().applyInternal(transform.getName(), input, transform); }
@Override public POutput expand(PInput input) { return PDone.in(input.getPipeline()); } };
@Override public PCollection<FeatureRow> expand(PInput input) { return input.getPipeline().apply("Read " + importSpec.getType(), getTransform()); }
@Override public POutput expand(PInput input) { return PDone.in(input.getPipeline()); }
@Override public PCollection<FeatureRow> expand(PInput input) { BigQuerySourceOptions options = OptionsParser .parse(importSpec.getOptionsMap(), BigQuerySourceOptions.class); List<String> entities = importSpec.getEntitiesList(); Preconditions.checkArgument( entities.size() == 1, "exactly 1 entity must be set for BigQuery import"); String url = String.format("%s:%s.%s", options.project, options.dataset, options.table); return input .getPipeline() .apply( BigQueryIO.read(new BigQueryToFeatureRowFn(importSpec)).from(url)); }
@Override public final PCollection<ValueWithRecordId<T>> expand(PInput input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED, ValueWithRecordId.ValueWithRecordIdCoder.of(source.getOutputCoder())); }
@Override public PCollection<FeatureRow> expand(PInput input) { checkArgument(importSpec.getType().equals(PUBSUB_FEATURE_SOURCE_TYPE)); PubSubReadOptions options = OptionsParser.parse(importSpec.getOptionsMap(), PubSubReadOptions.class); PubsubIO.Read<FeatureRow> read = readProtos(); if (!Strings.isNullOrEmpty(options.subscription)) { read = read.fromSubscription(options.subscription); } else if (!Strings.isNullOrEmpty(options.topic)) { read = read.fromTopic(options.topic); } PCollection<FeatureRow> featureRow = input.getPipeline().apply(read); if (options.discardUnknownFeatures) { List<String> featureIds = new ArrayList<>(); for(Field field: importSpec.getSchema().getFieldsList()) { String featureId = field.getFeatureId(); if (!Strings.isNullOrEmpty(featureId)) { featureIds.add(featureId); } } return featureRow.apply(ParDo.of(new FilterFeatureRowDoFn(featureIds))); } return featureRow; }
"CSV import needs schema with a least one field specified"); PCollection<StringMap> stringMaps = input.getPipeline().apply(CsvIO.read(path, fieldNames));
input.getPipeline().apply(kafkaIOReader);
@Override public PCollection<StringMap> expand(PInput input) { PCollection<String> text = input.getPipeline().apply(TextIO.read().from(inputPath)); return text.apply(ParDo.of(new DoFn<String, StringMap>() { @ProcessElement public void processElement(ProcessContext context) { String line = context.element(); if (line != null && !line.isEmpty()) { for (StringMap map : csvLineParser.records(line)) { context.output(map); } } } })).setCoder(SerializableCoder.of(StringMap.class)); } }
@Override public PCollection<Row> expand(PInput input) { BeamSqlEnv sqlEnv = BeamSqlEnv.readOnly(PCOLLECTION_NAME, toTableMap(input)); // TODO: validate duplicate functions. sqlEnv.loadBeamBuiltinFunctions(); registerFunctions(sqlEnv); if (autoUdfUdafLoad()) { sqlEnv.loadUdfUdafFromProvider(); } return BeamSqlRelUtils.toPCollection(input.getPipeline(), sqlEnv.parseQuery(queryString())); }
@Override public PCollection<FeatureRow> expand(PInput input) { JsonFileFeatureSourceOptions options = OptionsParser .parse(importSpec.getOptionsMap(), JsonFileFeatureSourceOptions.class); PCollection<String> jsonLines = input.getPipeline().apply(TextIO.read().from(options.path)); return jsonLines.apply( ParDo.of( new DoFn<String, FeatureRow>() { @ProcessElement public void processElement(ProcessContext context) { String line = context.element(); FeatureRow.Builder builder = FeatureRow.newBuilder(); try { JsonFormat.parser().merge(line, builder); context.output(builder.build()); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(e); } } })); }