org.apache.beam.sdk.values java code examples

@Override
public PCollection<Long> expand(PCollection<Struct> input) {
 return input.apply(ParDo.of(new EstimateStructSizeFn()));
}

@Override
public PCollection<T> expand(PBegin input) {
 return PCollection.createPrimitiveOutputInternal(
   input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED, coder);
}

/**
 * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}.
 *
 * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling {@link #and} on
 * the result.
 */
public static <T> PCollectionList<T> of(PCollection<T> pc) {
 return new PCollectionList<T>(pc.getPipeline()).and(pc);
}

public static void runCsvToAvro(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert CSV to Avro
 pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile()))
   .apply("Convert CSV to Avro formatted data",
     ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter())))
   .setCoder(AvroCoder.of(GenericRecord.class, schema))
   .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson)
     .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

public static void main(String[] args) {
 Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 Pipeline p = Pipeline.create(options);
 SpannerConfig spannerConfig = SpannerConfig.create()
   .withInstanceId(options.getInstanceId())
   .withDatabaseId(options.getDatabaseId());
 // [START spanner_dataflow_readall]
 PCollection<Struct> allRecords = p.apply(SpannerIO.read()
   .withSpannerConfig(spannerConfig)
   .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t"
     + ".table_catalog = '' AND t.table_schema = ''")).apply(
   MapElements.into(TypeDescriptor.of(ReadOperation.class))
     .via((SerializableFunction<Struct, ReadOperation>) input -> {
      String tableName = input.getString(0);
      return ReadOperation.create().withQuery("SELECT * FROM " + tableName);
     })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig));
 // [END spanner_dataflow_readall]
 PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create())
   .apply(Sum.longsGlobally());
 dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput())
   .withoutSharding());
 p.run().waitUntilFinish();
}

@Override
public PDone expand(PCollection<PubsubMessage> input) {
 return PDone.in(input.getPipeline());
}

@Override
public final PCollection<ValueWithRecordId<T>> expand(PInput input) {
 return PCollection.createPrimitiveOutputInternal(
   input.getPipeline(),
   WindowingStrategy.globalDefault(),
   IsBounded.UNBOUNDED,
   ValueWithRecordId.ValueWithRecordIdCoder.of(source.getOutputCoder()));
}

@Override
public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) {
 if (typeDescriptor.equals(TypeDescriptor.of(TestSchemaClass.class))) {
  return v -> Row.withSchema(EMPTY_SCHEMA).build();
 }
 return null;
}

@Test
public void testTypeDescriptorImmediate() throws Exception {
 assertEquals(Boolean.class, new TypeDescriptor<Boolean>() {}.getRawType());
 assertEquals(Double.class, new TypeDescriptor<Double>() {}.getRawType());
 assertEquals(Float.class, new TypeDescriptor<Float>() {}.getRawType());
 assertEquals(Integer.class, new TypeDescriptor<Integer>() {}.getRawType());
 assertEquals(Long.class, new TypeDescriptor<Long>() {}.getRawType());
 assertEquals(Short.class, new TypeDescriptor<Short>() {}.getRawType());
 assertEquals(String.class, new TypeDescriptor<String>() {}.getRawType());
}

/** The {@link TypeDescriptor} for {@link Map}. */
public static <K, V> TypeDescriptor<Map<K, V>> maps(
  TypeDescriptor<K> keyType, TypeDescriptor<V> valueType) {
 TypeDescriptor<Map<K, V>> typeDescriptor =
   new TypeDescriptor<Map<K, V>>() {}.where(new TypeParameter<K>() {}, keyType)
     .where(new TypeParameter<V>() {}, valueType);
 return typeDescriptor;
}

.apply(EstimateSize.create())
.apply(Sum.longsGlobally());
.apply(ToString.elements())
.apply(TextIO.write().to(options.getOutput()).withoutSharding());

@Override
public PCollection<T> expand(PBegin input) {
 return PCollection.createPrimitiveOutputInternal(
   input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.BOUNDED, coder);
}

@Override
public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) {
 if (typeDescriptor.equals(TypeDescriptor.of(TestDefaultSchemaClass.class))) {
  return v -> Row.withSchema(EMPTY_SCHEMA).build();
 }
 return null;
}

 public static void main(String[] args) {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  String instanceId = options.getInstanceId();
  String databaseId = options.getDatabaseId();
  // [START spanner_dataflow_read]
  // Query for all the columns and rows in the specified Spanner table
  PCollection<Struct> records = p.apply(
    SpannerIO.read()
      .withInstanceId(instanceId)
      .withDatabaseId(databaseId)
      .withQuery("SELECT * FROM " + options.getTable()));
  // [END spanner_dataflow_read]


  PCollection<Long> tableEstimatedSize = records
    // Estimate the size of every row
    .apply(EstimateSize.create())
    // Sum all the row sizes to get the total estimated size of the table
    .apply(Sum.longsGlobally());

  // Write the total size to a file
  tableEstimatedSize
    .apply(ToString.elements())
    .apply(TextIO.write().to(options.getOutput()).withoutSharding());

  p.run().waitUntilFinish();
 }
}

 @Override
 public PCollection<T> expand(PBegin input) {
  return PCollection.createPrimitiveOutputInternal(
    input.getPipeline(),
    WindowingStrategy.globalDefault(),
    PCollection.IsBounded.UNBOUNDED,
    coder);
 }
}

public static void runAvroToCsv(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert Avro To CSV
 pipeline.apply("Read Avro files",
   AvroIO.readGenericRecords(schemaJson).from(options.getInputFile()))
   .apply("Convert Avro to CSV formatted data",
     ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter())))
   .apply("Write CSV formatted data", TextIO.write().to(options.getOutput())
     .withSuffix(".csv"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

@Override
public final PCollection<T> expand(PBegin input) {
 source.validate();
 return PCollection.createPrimitiveOutputInternal(
   input.getPipeline(),
   WindowingStrategy.globalDefault(),
   IsBounded.UNBOUNDED,
   source.getOutputCoder());
}

.apply("ParseSingers", ParDo.of(new ParseSinger()))
.apply("CreateSingerMutation", ParDo.of(new DoFn<Singer, Mutation>() {
 @ProcessElement
 public void processElement(ProcessContext c) {
.apply("WriteSingers", SpannerIO.write()
  .withInstanceId(instanceId)
  .withDatabaseId(databaseId));
.apply("ReadAlbums", TextIO.read().from(options.getAlbumsFilename()))
.apply("ParseAlbums", ParDo.of(new ParseAlbum()));
.apply("CreateAlbumMutation", ParDo.of(new DoFn<Album, Mutation>() {
 @ProcessElement
 public void processElement(ProcessContext c) {
.apply("WriteAlbums", SpannerIO.write()
  .withInstanceId(instanceId)
  .withDatabaseId(databaseId));

  .apply(MapElements.via(new SimpleFunction<String, MutationGroup>() {
mutations.apply(SpannerIO.write()
  .withInstanceId(instanceId)
  .withDatabaseId(databaseId)

singers.apply(MapElements.via(new SimpleFunction<Struct, String>() {
  return Joiner.on(DELIMITER).join(input.getLong(0), input.getString(1), input.getString(2));
})).apply(TextIO.write().to(options.getSingersFilename()).withoutSharding());
albums.apply(MapElements.via(new SimpleFunction<Struct, String>() {
  return Joiner.on(DELIMITER).join(input.getLong(0), input.getLong(1), input.getString(2));
})).apply(TextIO.write().to(options.getAlbumsFilename()).withoutSharding());

How to use org.apache.beam.sdk.values

Best Java code snippets using org.apache.beam.sdk.values (Showing top 20 results out of 936)