@Override public PCollection<Long> expand(PCollectionList<Long> input) { return input.get(0); } });
IsBounded isBounded = IsBounded.BOUNDED; if (!inputs.getAll().isEmpty()) { windowingStrategy = inputs.get(0).getWindowingStrategy(); for (PCollection<?> input : inputs.getAll()) { WindowingStrategy<?, ?> other = input.getWindowingStrategy(); isBounded, inputs.getAll().isEmpty() ? null : inputs.get(0).getCoder());
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamIOSinkRel.class.getSimpleName(), pinput); PCollection<Row> input = pinput.get(0); sqlTable.buildIOWriter(input); return input; } }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { // The set of rows where we run the correlated unnest for each row PCollection<Row> outer = pinput.get(0); Schema joinedSchema = CalciteUtils.toSchema(rowType); return outer .apply(ParDo.of(new UnnestFn(joinedSchema, unnestIndex))) .setRowSchema(joinedSchema); } }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamUncollectRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0); // Each row of the input contains a single array of things to be emitted; Calcite knows // what the row looks like Schema outputSchema = CalciteUtils.toSchema(getRowType()); PCollection<Row> uncollected = upstream.apply(ParDo.of(new UncollectDoFn(outputSchema))).setRowSchema(outputSchema); return uncollected; } }
BeamAggregationRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0); PCollection<Row> windowedStream = upstream; if (windowFn != null) {
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamCalcRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0); BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(BeamCalcRel.this.getProgram()); Schema schema = CalciteUtils.toSchema(rowType); PCollection<Row> projectStream = upstream .apply(ParDo.of(new CalcFn(executor, CalciteUtils.toSchema(rowType)))) .setRowSchema(schema); projectStream.setRowSchema(CalciteUtils.toSchema(getRowType())); return projectStream; } }
@Test @Category(NeedsRunner.class) public void testModPartitionWithLambda() { PCollectionList<Integer> outputs = pipeline .apply(Create.of(1, 2, 4, 5)) .apply(Partition.of(3, (element, numPartitions) -> element % numPartitions)); assertEquals(3, outputs.size()); PAssert.that(outputs.get(0)).empty(); PAssert.that(outputs.get(1)).containsInAnyOrder(1, 4); PAssert.that(outputs.get(2)).containsInAnyOrder(2, 5); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testModPartition() { PCollectionList<Integer> outputs = pipeline.apply(Create.of(1, 2, 4, 5)).apply(Partition.of(3, new ModFn())); assertTrue(outputs.size() == 3); PAssert.that(outputs.get(0)).empty(); PAssert.that(outputs.get(1)).containsInAnyOrder(1, 4); PAssert.that(outputs.get(2)).containsInAnyOrder(2, 5); pipeline.run(); }
checkArgument(pinput.size() == 1, "More than one input received for side input join"); Schema schema = CalciteUtils.toSchema(getRowType()); return joinAsLookup(leftRelNode, rightRelNode, pinput.get(0), schema).setRowSchema(schema); PCollection<Row> leftRows = pinput.get(0); PCollection<Row> rightRows = pinput.get(1);
@Test @Category(NeedsRunner.class) public void testEvenOddPartition() { PCollectionList<Integer> outputs = pipeline .apply(Create.of(591, 11789, 1257, 24578, 24799, 307)) .apply(Partition.of(2, new ModFn())); assertTrue(outputs.size() == 2); PAssert.that(outputs.get(0)).containsInAnyOrder(24578); PAssert.that(outputs.get(1)).containsInAnyOrder(591, 11789, 1257, 24799, 307); pipeline.run(); }
BeamIOSinkRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0);
/** * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is * not useful. If this test ever fails there may be simplifications available to us. */ @Test @Category(NeedsRunner.class) public void testPartitionFnOutputTypeDescriptorRaw() throws Exception { PCollectionList<String> output = pipeline.apply(Create.of("hello")).apply(Partition.of(1, (element, numPartitions) -> 0)); thrown.expect(CannotProvideCoderException.class); pipeline.getCoderRegistry().getCoder(output.get(0).getTypeDescriptor()); } }
PCollectionList.of(larger.get(1).apply(ParDo.of(new FlattenTops())))
results[i] = studentsByPercentile.get(i) .apply(MapElements.via(new SimpleFunction<Student, KV<String, Integer>>() { @Override
.of(larger.get(1).apply(ParDo.of(new FlattenTops())))
public static void main(String[] args) throws Exception { TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficFlowOptions.class); options.setStreaming(true); options.setBigQuerySchema(getSchema()); ExampleUtils exampleUtils = new ExampleUtils(options); exampleUtils.setup(); Pipeline pipeline = Pipeline.create(options); TableReference tableRef = getTableReference( options.getProject(), options.getBigQueryDataset(), options.getBigQueryTable()); PCollectionList<TableRow> resultList = pipeline .apply("ReadMyFile", TextIO.read().from(options.getInput())) .apply("InsertRandomDelays", ParDo.of(new InsertDelays())) .apply(ParDo.of(new ExtractFlowInfo())) .apply(new CalculateTotalFlow(options.getWindowDuration())); for (int i = 0; i < resultList.size(); i++) { resultList.get(i).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(getSchema())); } PipelineResult result = pipeline.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exits. exampleUtils.waitToFinish(result); }