[tarantool-patches] [PATCH v2 1/1] sql: support HAVING without GROUP BY clause

  • From: Kirill Shcherbatov <kshcherbatov@xxxxxxxxxxxxx>
  • To: tarantool-patches@xxxxxxxxxxxxx, v.shpilevoy@xxxxxxxxxxxxx
  • Date: Thu, 29 Nov 2018 17:33:11 +0300

Branch: 
http://github.com/tarantool/tarantool/tree/kshsh/gh-2364-having-without-groupby
Issue: https://github.com/tarantool/tarantool/issues/2364

Allowed to make SELECT requests that have HAVING clause without
GROUP BY. It is possible when both - left and right parts of
request have aggregate function or constant value.

Closes #2364.

@TarantoolBot document
Title: HAVING without GROUP BY clause
A query with a having clause should also have a group by clause.
If you omit group by, all the rows not excluded by the where
clause return as a single group.
Because no grouping is performed between the where and having
clauses, they cannot act independently of each other. Having
acts like where because it affects the rows in a single group
rather than groups, except the having clause can still use
aggregates.
Having without group by is not supported for select from
multiple tables.
---
 src/box/sql/resolve.c         |  97 +++++++++++++++++++++++---
 src/box/sql/sqliteInt.h       |   3 +-
 test/sql-tap/count.test.lua   |   8 +--
 test/sql-tap/select3.test.lua |   2 +-
 test/sql-tap/select5.test.lua | 128 +++++++++++++++++++++++++++++++++-
 5 files changed, 223 insertions(+), 15 deletions(-)

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 9a2d6ff4e..34f0c836a 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -602,6 +602,8 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
                /* A lone identifier is the name of a column.
                 */
        case TK_ID:{
+                       if ((pNC->ncFlags & NC_AllowAgg) != 0)
+                               pNC->ncFlags |= NC_HasUnaggregatedId;
                        return lookupName(pParse, 0, pExpr->u.zToken, pNC,
                                          pExpr);
                }
@@ -1180,6 +1182,50 @@ resolveOrderGroupBy(NameContext * pNC,   /* The name 
context of the SELECT stateme
        return sqlite3ResolveOrderGroupBy(pParse, pSelect, pOrderBy, zType);
 }
 
+/**
+ * Test if specified expression is a regular literal.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is literal.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_literal(struct Expr *expr)
+{
+       return expr->op == TK_INTEGER || expr->op == TK_FLOAT ||
+              expr->op == TK_BLOB || expr->op == TK_STRING ||
+              expr->op == TK_NULL || expr->op == TK_UMINUS;
+}
+
+/**
+ * Test if specified expression is a constant function.
+ * @param parser Parsing context.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is a existent constant function.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_constant_func(struct Parse *parser, struct Expr *expr)
+{
+       if (expr->op != TK_FUNCTION)
+               return false;
+       char *func_name = expr->u.zToken;
+       struct ExprList *args_list = expr->x.pList;
+       int args_count = args_list != NULL ? args_list->nExpr : 0;
+       struct FuncDef *func =
+               sqlite3FindFunction(parser->db, func_name, args_count, 0);
+       if (func == NULL) {
+               /*
+                * If we fail to find function with exact number
+                * of arguments, lets try to search similar
+                * function but with different number of args.
+                */
+               func = sqlite3FindFunction(parser->db, func_name, -2, 0);
+               if (func == NULL)
+                       return false;
+       }
+       return (func->funcFlags & SQLITE_FUNC_CONSTANT) != 0;
+}
+
 /*
  * Resolve names in the SELECT statement p and all of its descendants.
  */
@@ -1285,13 +1331,23 @@ resolveSelectStep(Walker * pWalker, Select * p)
                /* Set up the local name-context to pass to 
sqlite3ResolveExprNames() to
                 * resolve the result-set expression list.
                 */
+               bool is_all_select_agg = true;
                sNC.ncFlags = NC_AllowAgg;
                sNC.pSrcList = p->pSrc;
                sNC.pNext = pOuterNC;
-
                /* Resolve names in the result set. */
-               if (sqlite3ResolveExprListNames(&sNC, p->pEList))
-                       return WRC_Abort;
+               for (i = 0; i < p->pEList->nExpr; i++) {
+                       struct Expr *expr = p->pEList->a[i].pExpr;
+                       u16 has_agg_flag = sNC.ncFlags & NC_HasAgg;
+                       sNC.ncFlags &= ~NC_HasAgg;
+                       if (sqlite3ResolveExprNames(&sNC, expr))
+                               return WRC_Abort;
+                       is_all_select_agg &= (sNC.ncFlags & NC_HasAgg) != 0 ||
+                                            sql_expr_is_literal(expr) ||
+                                            sql_expr_is_constant_func(pParse,
+                                                                      expr);
+                       sNC.ncFlags |= has_agg_flag;
+               }
 
                /* If there are no aggregate functions in the result-set, and 
no GROUP BY
                 * expression, do not allow aggregates in any of the other 
expressions.
@@ -1306,12 +1362,37 @@ resolveSelectStep(Walker * pWalker, Select * p)
                        sNC.ncFlags &= ~NC_AllowAgg;
                }
 
-               /* If a HAVING clause is present, then there must be a GROUP BY 
clause.
+               /*
+                * If a HAVING clause is present, then there must
+                * be a GROUP BY clause or aggregate function
+                * should be specified.
                 */
-               if (p->pHaving && !pGroupBy) {
-                       sqlite3ErrorMsg(pParse,
-                                       "a GROUP BY clause is required before 
HAVING");
-                       return WRC_Abort;
+               if (p->pHaving != NULL && pGroupBy == NULL) {
+                       struct NameContext having_nc;
+                       memset(&having_nc, 0, sizeof(having_nc));
+                       having_nc.pParse = pParse;
+                       having_nc.ncFlags = NC_AllowAgg;
+                       having_nc.pSrcList = p->pSrc;
+                       if (is_all_select_agg &&
+                           sqlite3ResolveExprNames(&having_nc,
+                                                   p->pHaving) != 0)
+                               return WRC_Abort;
+                       if ((having_nc.ncFlags & NC_HasAgg) == 0 ||
+                           (having_nc.ncFlags & NC_HasUnaggregatedId) != 0) {
+                               const char *err_msg =
+                                       tt_sprintf("HAVING argument must "
+                                                  "appear in the GROUP BY "
+                                                  "clause or be used in an "
+                                                  "aggregate function");
+                               diag_set(ClientError, ER_SQL, err_msg);
+                               pParse->nErr++;
+                               pParse->rc = SQL_TARANTOOL_ERROR;
+                               return WRC_Abort;
+                       }
+                       sql_expr_delete(db, p->pLimit, false);
+                       p->pLimit =
+                           sqlite3ExprAlloc(db, TK_INTEGER,
+                                            &sqlite3IntTokens[1], 0);
                }
 
                /* Add the output column list to the name-context before 
parsing the
diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
index dbf58d967..8f0e0a320 100644
--- a/src/box/sql/sqliteInt.h
+++ b/src/box/sql/sqliteInt.h
@@ -2454,7 +2454,8 @@ struct NameContext {
 #define NC_IdxExpr   0x0020    /* True if resolving columns of CREATE INDEX */
 #define NC_VarSelect 0x0040    /* A correlated subquery has been seen */
 #define NC_MinMaxAgg 0x1000    /* min/max aggregates seen.  See note above */
-
+/** One or more identifiers seen without aggregate function. */
+#define NC_HasUnaggregatedId     0x2000
 /*
  * An instance of the following structure contains all information
  * needed to generate code for a single SELECT statement.
diff --git a/test/sql-tap/count.test.lua b/test/sql-tap/count.test.lua
index b05e3a28e..6f58210f4 100755
--- a/test/sql-tap/count.test.lua
+++ b/test/sql-tap/count.test.lua
@@ -172,15 +172,15 @@ test:do_test(
         return uses_op_count("SELECT count(*) FROM t2 WHERE a IS NOT NULL")
     end, 0)
 
-test:do_catchsql_test(
+test:do_execsql_test(
     "count-2.9",
     [[
         SELECT count(*) FROM t2 HAVING count(*)>1
-    ]], {
+    ]],
         -- <count-2.9>
-        1, "a GROUP BY clause is required before HAVING"
+        {}
         -- </count-2.9>
-    })
+    )
 
 test:do_test(
     "count-2.10",
diff --git a/test/sql-tap/select3.test.lua b/test/sql-tap/select3.test.lua
index dbc95f0d8..2704c267e 100755
--- a/test/sql-tap/select3.test.lua
+++ b/test/sql-tap/select3.test.lua
@@ -200,7 +200,7 @@ test:do_catchsql_test("select3-3.1", [[
   SELECT log, count(*) FROM t1 HAVING log>=4
 ]], {
   -- <select3-3.1>
-  1, "a GROUP BY clause is required before HAVING"
+  1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used 
in an aggregate function"
   -- </select3-3.1>
 })
 
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 9c3cd2759..b889132aa 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(32)
+test:plan(44)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -412,5 +412,131 @@ test:do_execsql_test(
         -- </select5-8.8>
     })
 
+--
+-- gh-2364: Support HAVING without GROUP BY clause.
+--
+test:do_catchsql_test(
+    "select5-9.1",
+    [[
+        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
+        INSERT INTO te40 VALUES (1,1);
+        INSERT INTO te40 VALUES (2,2);
+        SELECT s1 FROM te40 HAVING s1 = 1;
+    ]], {
+    -- <select5-9.1>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be 
used in an aggregate function"
+    -- </select5-9.1>
+})
+
+test:do_catchsql_test(
+    "select5-9.2",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING s1 = 2;
+    ]], {
+    -- <select5-9.2>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be 
used in an aggregate function"
+    -- </select5-9.2>
+})
+
+test:do_catchsql_test(
+    "select5-9.3",
+    [[
+        SELECT s1 FROM te40 HAVING SUM(s1) = 2;
+    ]], {
+    -- <select5-9.3>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be 
used in an aggregate function"
+    -- </select5-9.3>
+})
+
+test:do_execsql_test(
+    "select5-9.4",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.4>
+    3
+    -- </select5-9.4>
+})
+
+test:do_execsql_test(
+    "select5-9.5",
+    [[
+        SELECT MIN(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.5>
+    1
+    -- </select5-9.5>
+})
+
+test:do_execsql_test(
+    "select5-9.6",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) < 0;
+    ]],
+    -- <select5-9.6>
+    {}
+    -- </select5-9.6>
+)
+
+test:do_catchsql_test(
+    "select5-9.7",
+    [[
+        SELECT SUM(s1),s2 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.7>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be 
used in an aggregate function"
+    -- </select5-9.7>
+})
+
+test:do_catchsql_test(
+    "select5-9.8",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and s2 > 0;
+    ]], {
+    -- <select5-9.8>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be 
used in an aggregate function"
+    -- </select5-9.8>
+})
+
+test:do_execsql_test(
+    "select5-9.9",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and SUM(s2) > 0;
+    ]], {
+    -- <select5-9.9>
+    3
+    -- </select5-9.9>
+})
+
+test:do_execsql_test(
+    "select5-9.10",
+    [[
+        SELECT 1 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.10>
+    1
+    -- </select5-9.10>
+})
+
+test:do_execsql_test(
+    "select5-9.11",
+    [[
+        SELECT -1 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.11>
+    -1
+    -- </select5-9.11>
+})
+
+test:do_execsql_test(
+    "select5-9.12",
+    [[
+        SELECT NULL FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.12>
+    ""
+    -- </select5-9.12>
+})
+
 test:finish_test()
 
-- 
2.19.2


Other related posts: