public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config) throws SQLException { SQLContext sqlContext = getSparkSession().sqlContext(); boolean forceRowKeyOrder = conn.unwrap(PhoenixConnection.class).getQueryServices().getProps() .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false); // if we are forcing row key order we have to add an ORDER BY // here we assume that the required columns are in the primary key column order String prevOrderBy = queryBuilder.getOrderByClause(); if (forceRowKeyOrder && (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) { queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns())); } // create PhoenixRDD using the table name and columns that are required by the query // since we don't set the predicate filtering is done after rows are returned from spark Dataset phoenixDataSet = getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName()) .option(PhoenixDataSource.ZOOKEEPER_URL, url).load(); phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName()); Dataset<Row> dataset = sqlContext.sql(queryBuilder.build()); SparkPlan plan = dataset.queryExecution().executedPlan(); List<Row> rows = dataset.collectAsList(); queryBuilder.setOrderByClause(prevOrderBy); ResultSet rs = new SparkResultSet(rows, dataset.columns()); return rs; } }
static public LogicalPlan createAnalyzedLogicalPlan(SparkSession sparkSession, StructType schema, String statement){ String tableName = "sql2pmml_" + DatasetUtil.ID.getAndIncrement(); statement = statement.replace("__THIS__", tableName); Dataset<Row> dataset = sparkSession.createDataFrame(Collections.emptyList(), schema); dataset.createOrReplaceTempView(tableName); try { QueryExecution queryExecution = sparkSession.sql(statement).queryExecution(); return queryExecution.analyzed(); } finally { Catalog catalog = sparkSession.catalog(); catalog.dropTempView(tableName); } }