@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamIOSinkRel.class.getSimpleName(), pinput); PCollection<Row> input = pinput.get(0); sqlTable.buildIOWriter(input); return input; } }
@Override public PCollection<Row> expand(PCollectionList<Row> input) { checkArgument( input.size() == 0, "Should not have received input for %s: %s", BeamIOSourceRel.class.getSimpleName(), input); return sqlTable.buildIOReader(input.getPipeline().begin()); } }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 0, "Should not have received input for %s: %s", BeamValuesRel.class.getSimpleName(), pinput); if (tuples.isEmpty()) { throw new IllegalStateException("Values with empty tuples!"); } Schema schema = CalciteUtils.toSchema(getRowType()); List<Row> rows = tuples.stream().map(tuple -> tupleToRow(schema, tuple)).collect(toList()); return pinput.getPipeline().begin().apply(Create.of(rows)).setRowSchema(schema); } }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamUncollectRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0); // Each row of the input contains a single array of things to be emitted; Calcite knows // what the row looks like Schema outputSchema = CalciteUtils.toSchema(getRowType()); PCollection<Row> uncollected = upstream.apply(ParDo.of(new UncollectDoFn(outputSchema))).setRowSchema(outputSchema); return uncollected; } }
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamAggregationRel.class.getSimpleName(),
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamCalcRel.class.getSimpleName(), pinput); PCollection<Row> upstream = pinput.get(0); BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(BeamCalcRel.this.getProgram()); Schema schema = CalciteUtils.toSchema(rowType); PCollection<Row> projectStream = upstream .apply(ParDo.of(new CalcFn(executor, CalciteUtils.toSchema(rowType)))) .setRowSchema(schema); projectStream.setRowSchema(CalciteUtils.toSchema(getRowType())); return projectStream; } }
@Override public PCollection<Row> expand(PCollectionList<Row> inputs) { checkArgument( inputs.size() == 2, "Wrong number of arguments to %s: %s", beamRelNode.getClass().getSimpleName(),
checkArgument(pinput.size() == 1, "More than one input received for side input join"); Schema schema = CalciteUtils.toSchema(getRowType()); return joinAsLookup(leftRelNode, rightRelNode, pinput.get(0), schema).setRowSchema(schema); Schema rightSchema = CalciteUtils.toSchema(right.getRowType()); assert pinput.size() == 2; PCollection<Row> leftRows = pinput.get(0); PCollection<Row> rightRows = pinput.get(1);
@Override public PCollection<Row> expand(PCollectionList<Row> pinput) { checkArgument( pinput.size() == 1, "Wrong number of inputs for %s: %s", BeamIOSinkRel.class.getSimpleName(),
@Test @Category(NeedsRunner.class) public void testDroppedPartition() { // Compute the set of integers either 1 or 2 mod 3, the hard way. PCollectionList<Integer> outputs = pipeline .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) .apply(Partition.of(3, new ModFn())); List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll()); outputsList.remove(0); outputs = PCollectionList.of(outputsList); assertTrue(outputs.size() == 2); PCollection<Integer> output = outputs.apply(Flatten.pCollections()); PAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testEvenOddPartition() { PCollectionList<Integer> outputs = pipeline .apply(Create.of(591, 11789, 1257, 24578, 24799, 307)) .apply(Partition.of(2, new ModFn())); assertTrue(outputs.size() == 2); PAssert.that(outputs.get(0)).containsInAnyOrder(24578); PAssert.that(outputs.get(1)).containsInAnyOrder(591, 11789, 1257, 24799, 307); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testModPartitionWithLambda() { PCollectionList<Integer> outputs = pipeline .apply(Create.of(1, 2, 4, 5)) .apply(Partition.of(3, (element, numPartitions) -> element % numPartitions)); assertEquals(3, outputs.size()); PAssert.that(outputs.get(0)).empty(); PAssert.that(outputs.get(1)).containsInAnyOrder(1, 4); PAssert.that(outputs.get(2)).containsInAnyOrder(2, 5); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testModPartition() { PCollectionList<Integer> outputs = pipeline.apply(Create.of(1, 2, 4, 5)).apply(Partition.of(3, new ModFn())); assertTrue(outputs.size() == 3); PAssert.that(outputs.get(0)).empty(); PAssert.that(outputs.get(1)).containsInAnyOrder(1, 4); PAssert.that(outputs.get(2)).containsInAnyOrder(2, 5); pipeline.run(); }
public static void main(String[] args) throws Exception { TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficFlowOptions.class); options.setStreaming(true); options.setBigQuerySchema(getSchema()); ExampleUtils exampleUtils = new ExampleUtils(options); exampleUtils.setup(); Pipeline pipeline = Pipeline.create(options); TableReference tableRef = getTableReference( options.getProject(), options.getBigQueryDataset(), options.getBigQueryTable()); PCollectionList<TableRow> resultList = pipeline .apply("ReadMyFile", TextIO.read().from(options.getInput())) .apply("InsertRandomDelays", ParDo.of(new InsertDelays())) .apply(ParDo.of(new ExtractFlowInfo())) .apply(new CalculateTotalFlow(options.getWindowDuration())); for (int i = 0; i < resultList.size(); i++) { resultList.get(i).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(getSchema())); } PipelineResult result = pipeline.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exits. exampleUtils.waitToFinish(result); }