@Override public void warmUp(Configuration configuration) { super.warmUp(configuration); // Run a most simple Spark job. this.logger.info("Running warm-up Spark job..."); long startTime = System.currentTimeMillis(); final RheemContext rheemCtx = new RheemContext(configuration); SparkCollectionSource<Integer> source = new SparkCollectionSource<>( Collections.singleton(0), DataSetType.createDefault(Integer.class) ); SparkLocalCallbackSink<Integer> sink = new SparkLocalCallbackSink<>( dq -> { }, DataSetType.createDefault(Integer.class) ); source.connectTo(0, sink, 0); final Job job = rheemCtx.createJob("Warm up", new RheemPlan(sink)); // Make sure not to have the warm-up jobs bloat the execution logs. job.getConfiguration().setProperty("rheem.core.log.enabled", "false"); job.execute(); long stopTime = System.currentTimeMillis(); this.logger.info("Spark warm-up finished in {}.", Formats.formatDuration(stopTime - startTime, true)); }