org.apache.beam.sdk.io.TextIO.write java code examples

public static void runAvroToCsv(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert Avro To CSV
 pipeline.apply("Read Avro files",
   AvroIO.readGenericRecords(schemaJson).from(options.getInputFile()))
   .apply("Convert Avro to CSV formatted data",
     ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter())))
   .apply("Write CSV formatted data", TextIO.write().to(options.getOutput())
     .withSuffix(".csv"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

  return Joiner.on(DELIMITER).join(input.getLong(0), input.getString(1), input.getString(2));
})).apply(TextIO.write().to(options.getSingersFilename()).withoutSharding());
  return Joiner.on(DELIMITER).join(input.getLong(0), input.getLong(1), input.getString(2));
})).apply(TextIO.write().to(options.getAlbumsFilename()).withoutSharding());

 public static void main(String[] args) {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  String instanceId = options.getInstanceId();
  String databaseId = options.getDatabaseId();
  // [START spanner_dataflow_read]
  // Query for all the columns and rows in the specified Spanner table
  PCollection<Struct> records = p.apply(
    SpannerIO.read()
      .withInstanceId(instanceId)
      .withDatabaseId(databaseId)
      .withQuery("SELECT * FROM " + options.getTable()));
  // [END spanner_dataflow_read]


  PCollection<Long> tableEstimatedSize = records
    // Estimate the size of every row
    .apply(EstimateSize.create())
    // Sum all the row sizes to get the total estimated size of the table
    .apply(Sum.longsGlobally());

  // Write the total size to a file
  tableEstimatedSize
    .apply(ToString.elements())
    .apply(TextIO.write().to(options.getOutput()).withoutSharding());

  p.run().waitUntilFinish();
 }
}

.apply(TextIO.write().to(options.getOutput()).withoutSharding());

public static void main(String[] args) {
 Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 Pipeline p = Pipeline.create(options);
 SpannerConfig spannerConfig = SpannerConfig.create()
   .withInstanceId(options.getInstanceId())
   .withDatabaseId(options.getDatabaseId());
 // [START spanner_dataflow_readall]
 PCollection<Struct> allRecords = p.apply(SpannerIO.read()
   .withSpannerConfig(spannerConfig)
   .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t"
     + ".table_catalog = '' AND t.table_schema = ''")).apply(
   MapElements.into(TypeDescriptor.of(ReadOperation.class))
     .via((SerializableFunction<Struct, ReadOperation>) input -> {
      String tableName = input.getString(0);
      return ReadOperation.create().withQuery("SELECT * FROM " + tableName);
     })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig));
 // [END spanner_dataflow_readall]
 PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create())
   .apply(Sum.longsGlobally());
 dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput())
   .withoutSharding());
 p.run().waitUntilFinish();
}

 @Override
 public PDone buildIOWriter(PCollection<Row> input) {
  return input
    .apply("RowToString", writeConverter)
    .apply("WriteTextFiles", TextIO.write().withDelimiter(new char[] {}).to(filePattern));
 }
}

@Override
public PDone expand(PCollectionTuple pCollectionTuple) {
 return pCollectionTuple
   .get(errorTag())
   .apply(TextIO.write().to(errorWritePath()).withNumShards(1));
}

 @Override
 public PDone expand(PBegin begin) {
  return begin.apply(Create.of(LINES)).apply(TextIO.write().to(filename));
 }
}

@Test
public void testGetName() {
 assertEquals("TextIO.Write", TextIO.write().to("somefile").getName());
}

@Test
public void testWriteDisplayDataValidateThenHeader() {
 TextIO.Write write = TextIO.write().to("foo").withHeader("myHeader");
 DisplayData displayData = DisplayData.from(write);
 assertThat(displayData, hasDisplayItem("fileHeader", "myHeader"));
}

@Test
public void testWriteDisplayDataValidateThenFooter() {
 TextIO.Write write = TextIO.write().to("foo").withFooter("myFooter");
 DisplayData displayData = DisplayData.from(write);
 assertThat(displayData, hasDisplayItem("fileFooter", "myFooter"));
}

public static void run(Options options) {
 Pipeline p = Pipeline.create(options);
 double samplingThreshold = 0.1;
 p.apply(TextIO.read().from(options.getWikiInput()))
   .apply(MapElements.via(new ParseTableRowJson()))
   .apply(new ComputeTopSessions(samplingThreshold))
   .apply("Write", TextIO.write().to(options.getOutput()));
 p.run().waitUntilFinish();
}

static void runWordCount(WordCountOptions options) {
 Pipeline p = Pipeline.create(options);
 // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
 // static FormatAsTextFn() to the ParDo transform.
 p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
   .apply(new CountWords())
   .apply(MapElements.via(new FormatAsTextFn()))
   .apply("WriteCounts", TextIO.write().to(options.getOutput()));
 p.run().waitUntilFinish();
}

 public static void main(String[] args) throws Exception {

  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  p.apply("ReadLines", TextIO.read().from(options.getInput()))
    .apply(Distinct.create())
    .apply("DedupedShakespeare", TextIO.write().to(options.getOutput()));

  p.run().waitUntilFinish();
 }
}

@Test
public void testRuntimeOptionsNotCalledInApply() throws Exception {
 p.enableAbandonedNodeEnforcement(false);
 RuntimeTestOptions options = PipelineOptionsFactory.as(RuntimeTestOptions.class);
 p.apply(Create.of("")).apply(TextIO.write().to(options.getOutput()));
}

 public static void main(String[] args) {
  WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
   .as(WordCountOptions.class);
  Pipeline p = Pipeline.create(options);
  p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
   .apply(new CountWords())
   .apply(MapElements.via(new FormatAsTextFn()))
   .apply("WriteCounts", TextIO.write().to(options.getOutput()));

  p.run();
 }
}

private Pipeline buildPipeline(DataflowPipelineOptions options) {
 options.setRunner(DataflowRunner.class);
 Pipeline p = Pipeline.create(options);
 p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object"))
   .apply("WriteMyFile", TextIO.write().to("gs://bucket/object"));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(p);
 return p;
}

private Pipeline buildDataflowPipeline(DataflowPipelineOptions options) {
 options.setStableUniqueNames(CheckEnabled.ERROR);
 options.setRunner(DataflowRunner.class);
 Pipeline p = Pipeline.create(options);
 p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object"))
   .apply("WriteMyFile", TextIO.write().to("gs://bucket/object"));
 // Enable the FileSystems API to know about gs:// URIs in this test.
 FileSystems.setDefaultPipelineOptions(options);
 return p;
}

@Test
public void testTextIOWithRuntimeParameters() throws IOException {
 DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
 RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
 Pipeline p = buildDataflowPipeline(dataflowOptions);
 p.apply(TextIO.read().from(options.getInput())).apply(TextIO.write().to(options.getOutput()));
}

static void runWordCount(WordCountOptions options) {
 Pipeline p = Pipeline.create(options);
 p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
   .apply(ParDo.of(new ExtractWordsFn()))
   .apply(Count.perElement())
   .apply(ParDo.of(new FormatAsStringFn()))
   .apply("WriteCounts", TextIO.write().to(options.getOutput()));
 p.run().waitUntilFinish();
}

Javadoc

A PTransform that writes a PCollection to a text file (or multiple text files matching a sharding pattern), with each element of the input collection encoded into its own line.

Popular methods of TextIO

read
A PTransform that reads from one or more text files and returns a bounded PCollection containing one
sink
Creates a Sink that writes newline-delimited strings in UTF-8, for use with FileIO#write.
readAll
A PTransform that works like #read, but reads each file in a PCollection of filepatterns.Can be appl
readFiles
Like #read, but reads each file in a PCollection of FileIO.ReadableFile, returned by FileIO#readMatc
writeCustomType
A PTransform that writes a PCollection to a text file (or multiple text files matching a sharding pa

Popular in Java

Finding current android device location
getSharedPreferences (Context)
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (ScheduledExecutorService)
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Table (org.hibernate.mapping)
A relational table
Top plugins for WebStorm

How to use writemethodin org.apache.beam.sdk.io.TextIO

Best Java code snippets using org.apache.beam.sdk.io.TextIO.write (Showing top 20 results out of 315)

How to use
write
method
in
org.apache.beam.sdk.io.TextIO