[tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE

  • From: Vladislav Shpilevoy <v.shpilevoy@xxxxxxxxxxxxx>
  • To: imeevma@xxxxxxxxxxxxx, tarantool-patches@xxxxxxxxxxxxx, Kirill Yukhin <kyukhin@xxxxxxxxxxxxx>
  • Date: Mon, 30 Jul 2018 23:27:12 +0300

Thanks for the fixes! LGTM.

On 30/07/2018 18:30, imeevma@xxxxxxxxxxxxx wrote:

SQL functions UPPER and LOWER now works
with COLLATE as they should according to
ANSI Standart.

Closes #3052.
---
Branch: 
https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
Issue: https://github.com/tarantool/tarantool/issues/3052

  src/box/sql/func.c          | 22 +++++++++++-----
  test/sql/collation.result   | 62 +++++++++++++++++++++++++++++++++++++++++++++
  test/sql/collation.test.lua | 23 +++++++++++++++++
  3 files changed, 100 insertions(+), 7 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e211de1..45056a7 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -37,12 +37,13 @@
  #include "sqliteInt.h"
  #include "vdbeInt.h"
  #include "version.h"
+#include "coll.h"
  #include <unicode/ustring.h>
  #include <unicode/ucasemap.h>
  #include <unicode/ucnv.h>
  #include <unicode/uchar.h>
+#include <unicode/ucol.h>
-static UCaseMap *pUCaseMap;
  static UConverter* pUtf8conv;
/*
@@ -503,7 +504,15 @@ case_type##ICUFunc(sqlite3_context *context, int argc, 
sqlite3_value **argv)   \
                return;                                                        \
        }                                                                      \
        UErrorCode status = U_ZERO_ERROR;                                      \
-       int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\
+       struct coll *coll = sqlite3GetFuncCollSeq(context);                    \
+       const char *locale = NULL;                                             \
+       if (coll != NULL) {                                                    \
+               locale = ucol_getLocaleByType(coll->collator,                  \
+                                             ULOC_VALID_LOCALE, &status);     \
+       }                                                                      \
+       UCaseMap *case_map = ucasemap_open(locale, 0, &status);                \
+       assert(case_map != NULL);                                              \
+       int len = ucasemap_utf8To##case_type(case_map, z1, n, z2, n, &status); \
        if (len > n) {                                                         \
                status = U_ZERO_ERROR;                                         \
                sqlite3_free(z1);                                              \
@@ -512,8 +521,9 @@ case_type##ICUFunc(sqlite3_context *context, int argc, 
sqlite3_value **argv)   \
                        sqlite3_result_error_nomem(context);                   \
                        return;                                                \
                }                                                              \
-               ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\
+               ucasemap_utf8To##case_type(case_map, z1, len, z2, n, &status); \
        }                                                                      \
+       ucasemap_close(case_map);                                              \
        sqlite3_result_text(context, z1, len, sqlite3_free);                   \
  }                                                                             
 \
@@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void)
         */
        UErrorCode status = U_ZERO_ERROR;
- pUCaseMap = ucasemap_open(NULL, 0, &status);
-       assert(pUCaseMap);
        pUtf8conv = ucnv_open("utf8", &status);
        assert(pUtf8conv);
        /*
@@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void)
                FUNCTION(round, 1, 0, 0, roundFunc),
                FUNCTION(round, 2, 0, 0, roundFunc),
  #endif
-               FUNCTION(upper, 1, 0, 0, UpperICUFunc),
-               FUNCTION(lower, 1, 0, 0, LowerICUFunc),
+               FUNCTION(upper, 1, 0, 1, UpperICUFunc),
+               FUNCTION(lower, 1, 0, 1, LowerICUFunc),
                FUNCTION(hex, 1, 0, 0, hexFunc),
                FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE),
                VFUNCTION(random, 0, 0, 0, randomFunc),
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 7fec96d..79ba9ab 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -32,6 +32,68 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
  ---
  - error: 'near "COLLATE": syntax error'
  ...
+-- gh-3052: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter 
CHAR)]]);
+---
+...
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I 
U+0049','I');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot 
Above U+0130','İ');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I 
U+0131','ı');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS 
lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'i']
+  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇']
+  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+  - ['Latin Small Letter I U+0069', 'I', 'i']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS 
upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'ı']
+  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i']
+  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+  - ['Latin Small Letter I U+0069', 'İ', 'i']
+...
+box.internal.collation.drop('TURKISH')
+---
+...
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S 
U+00DF','ß');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where 
UPPER(letter) = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu 
where UPPER(letter COLLATE "GERMAN") = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
+box.internal.collation.drop('GERMAN')
+---
+...
+box.sql.execute(([[DROP TABLE tu]]))
+---
+...
  box.schema.user.grant('guest','read,write,execute', 'universe')
  ---
  ...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index ff2c5b2..935dea8 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -12,6 +12,29 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;")
  box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;")
  box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
+-- gh-3052: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter 
CHAR)]]);
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I 
U+0049','I');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot 
Above U+0130','İ');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I 
U+0131','ı');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS 
lower FROM tu;]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS 
upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+box.internal.collation.drop('TURKISH')
+
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S 
U+00DF','ß');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where 
UPPER(letter) = 'SS';]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu 
where UPPER(letter COLLATE "GERMAN") = 'SS';]])
+box.internal.collation.drop('GERMAN')
+box.sql.execute(([[DROP TABLE tu]]))
box.schema.user.grant('guest','read,write,execute', 'universe')
  cn = remote.connect(box.cfg.listen)


Other related posts: