plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig());
@Test public void testLocalExecutorWithWordCount() { try { // set up the files File inFile = File.createTempFile("wctext", ".in"); File outFile = File.createTempFile("wctext", ".out"); inFile.deleteOnExit(); outFile.deleteOnExit(); try (FileWriter fw = new FileWriter(inFile)) { fw.write(WordCountData.TEXT); } LocalExecutor executor = new LocalExecutor(); executor.setDefaultOverwriteFiles(true); executor.setTaskManagerNumSlots(parallelism); executor.setPrintStatusDuringExecution(false); executor.start(); Plan wcPlan = getWordCountPlan(inFile, outFile, parallelism); wcPlan.setExecutionConfig(new ExecutionConfig()); executor.executePlan(wcPlan); executor.stop(); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testCompileKMeansSingleStepWithOutStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); OptimizedPlan plan = compileNoStats(p); checkPlan(plan); }
private void dump(Plan p) { p.setExecutionConfig(new ExecutionConfig()); try { OptimizedPlan op = compileNoStats(p); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); String json = dumper.getOptimizerPlanAsJSON(op); JsonParser parser = new JsonFactory().createJsonParser(json); while (parser.nextToken() != null) {} } catch (JsonParseException e) { e.printStackTrace(); Assert.fail("JSON Generator produced malformatted output: " + e.getMessage()); } catch (Exception e) { e.printStackTrace(); Assert.fail("An error occurred in the test: " + e.getMessage()); } } }
/** * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly, * re-exploiting the sorted order is cheaper. */ @Test public void testQueryWithStatsForRepartitionMerge() { Plan p = getTPCH3Plan(); p.setExecutionConfig(defaultExecutionConfig); // set compiler hints OperatorResolver cr = getContractResolver(p); DualInputOperator<?, ?, ?, ?> match = cr.getNode(JOIN_NAME); match.getCompilerHints().setFilterFactor(100f); testQueryGeneric(100L * 1024 * 1024 * 1024 * 1024, 100L * 1024 * 1024 * 1024 * 1024, 0.01f, 100f, false, true, false, false, true); }
@Test public void testCompileKMeansSingleStepWithStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); // set the statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS); GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS); setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f); setSourceStatistics(centersSource, 1024 * 1024, 32f); OptimizedPlan plan = compileWithStats(p); checkPlan(plan); }
/** * Verifies that a robust repartitioning plan with a hash join is created in the absence of statistics. */ @Test public void testQueryNoStatistics() { try { Plan p = getTPCH3Plan(); p.setExecutionConfig(defaultExecutionConfig); // compile final OptimizedPlan plan = compileNoStats(p); final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan); // get the nodes from the final plan final SinkPlanNode sink = or.getNode(SINK); final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME); final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ? (SingleInputPlanNode) reducer.getPredecessor() : null; final DualInputPlanNode join = or.getNode(JOIN_NAME); final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME); // verify the optimizer choices checkStandardStrategies(filteringMapper, join, combiner, reducer, sink); Assert.assertTrue(checkRepartitionShipStrategies(join, reducer, combiner)); Assert.assertTrue(checkHashJoinStrategies(join, reducer, true) || checkHashJoinStrategies(join, reducer, false)); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
private void testQueryGeneric(Plan p, long orderSize, long lineitemSize, float orderSelectivity, float joinSelectivity, boolean broadcastOkay, boolean partitionedOkay, boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) { try { // set statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> ordersSource = cr.getNode(ORDERS); GenericDataSourceBase<?, ?> lineItemSource = cr.getNode(LINEITEM); SingleInputOperator<?, ?, ?> mapper = cr.getNode(MAPPER_NAME); DualInputOperator<?, ?, ?, ?> joiner = cr.getNode(JOIN_NAME); setSourceStatistics(ordersSource, orderSize, 100f); setSourceStatistics(lineItemSource, lineitemSize, 140f); mapper.getCompilerHints().setAvgOutputRecordSize(16f); mapper.getCompilerHints().setFilterFactor(orderSelectivity); joiner.getCompilerHints().setFilterFactor(joinSelectivity); // compile final OptimizedPlan plan = compileWithStats(p); final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan); // get the nodes from the final plan final SinkPlanNode sink = or.getNode(SINK); final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME); final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ? (SingleInputPlanNode) reducer.getPredecessor() : null; final DualInputPlanNode join = or.getNode(JOIN_NAME); final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME); checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
p.setExecutionConfig(new ExecutionConfig());
@Test public void testWorksetConnectedComponents() { Plan plan = getConnectedComponentsCoGroupPlan(); plan.setExecutionConfig(new ExecutionConfig()); OptimizedPlan optPlan = compileNoStats(plan); OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig());
plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig());
plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig());