Skip to content

Commit c7e6f84

Browse files
committed
Merge branch 'wl13094-new-collations' into master
2 parents 609760e + 6b788c4 commit c7e6f84

File tree

6 files changed

+254
-105
lines changed

6 files changed

+254
-105
lines changed

cdk/include/mysql/cdk/protocol/mysqlx/collations.h

Lines changed: 62 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,18 @@
3939
#define MYSQL_CDK_PROTOCOL_MYSQLX_COLLATIONS_H
4040

4141
/*
42-
Each line X(CS, ID, COLL, CASE) in the expansion of
42+
Each line X(CS, ID, COLL, SENSITIVITY) in the expansion of
4343
a COLLATION_XXX() macro declares collation with name COLL for character set
44-
CS. ID is the MySQL id number for the collation. CASE is one of ci, cs or bin
45-
and indicates whether it is case insensitive, sensitive or binary collation,
46-
respectively.
44+
CS. ID is the MySQL id number for the collation. SENSITIVITY is either 'bin'
45+
for binary collations or a combination of sensitivity flags such as 'ai_ci',
46+
using the same conventions as the ones used in MySQL collation names.
47+
48+
Note: CS, COLL and SENSITIVITY are used to reconstruct the full MySQL name of
49+
the collation and should follow the same naming conventions (with few
50+
exceptions that we handle separately)
4751
*/
4852

53+
4954
#define COLLATIONS_big5(X) \
5055
X(big5,1,chinese,ci) \
5156
X(big5,84,bin,bin) \
@@ -228,75 +233,84 @@
228233
X(latin7,79,bin,bin) \
229234

230235
#define COLLATIONS_utf8mb4(X) \
231-
X(utf8mb4,255,uca0900_ai,ci) \
232-
X(utf8mb4,278,uca0900_as,cs) \
233-
X(utf8mb4,46,utf8mb4,bin) \
236+
X(utf8mb4,255,uca0900,ai_ci) \
237+
X(utf8mb4,278,uca0900,as_cs) \
238+
X(utf8mb4,46,bin,bin) \
234239
X(utf8mb4,245,croatian,ci) \
235-
X(utf8mb4,266,cs_0900_ai,ci) \
236-
X(utf8mb4,289,cs_0900_as,cs) \
240+
X(utf8mb4,266,cs_0900,ai_ci) \
241+
X(utf8mb4,289,cs_0900,as_cs) \
237242
X(utf8mb4,234,czech,ci) \
238243
X(utf8mb4,235,danish,ci) \
239-
X(utf8mb4,267,da_0900_ai,ci) \
240-
X(utf8mb4,290,da_0900_as,cs) \
241-
X(utf8mb4,256,de_pb_0900_ai,ci) \
242-
X(utf8mb4,279,de_pb_0900_as,cs) \
243-
X(utf8mb4,273,eo_0900_ai,ci) \
244-
X(utf8mb4,296,eo_0900_as,cs) \
244+
X(utf8mb4,267,da_0900,ai_ci) \
245+
X(utf8mb4,290,da_0900,as_cs) \
246+
X(utf8mb4,256,de_pb_0900,ai_ci) \
247+
X(utf8mb4,279,de_pb_0900,as_cs) \
248+
X(utf8mb4,273,eo_0900,ai_ci) \
249+
X(utf8mb4,296,eo_0900,as_cs) \
245250
X(utf8mb4,241,esperanto,ci) \
246251
X(utf8mb4,230,estonian,ci) \
247-
X(utf8mb4,263,es_0900_ai,ci) \
248-
X(utf8mb4,286,es_0900_as,cs) \
249-
X(utf8mb4,270,es_trad_0900_ai,ci) \
250-
X(utf8mb4,293,es_trad_0900_as,cs) \
251-
X(utf8mb4,262,et_0900_ai,ci) \
252-
X(utf8mb4,285,et_0900_as,cs) \
252+
X(utf8mb4,263,es_0900,ai_ci) \
253+
X(utf8mb4,286,es_0900,as_cs) \
254+
X(utf8mb4,270,es_trad_0900,ai_ci) \
255+
X(utf8mb4,293,es_trad_0900,as_cs) \
256+
X(utf8mb4,262,et_0900,ai_ci) \
257+
X(utf8mb4,285,et_0900,as_cs) \
253258
X(utf8mb4,45,general,ci) \
254259
X(utf8mb4,244,german2,ci) \
255-
X(utf8mb4,275,hr_0900_ai,ci) \
256-
X(utf8mb4,298,hr_0900_as,cs) \
260+
X(utf8mb4,275,hr_0900,ai_ci) \
261+
X(utf8mb4,298,hr_0900,as_cs) \
257262
X(utf8mb4,242,hungarian,ci) \
258-
X(utf8mb4,274,hu_0900_ai,ci) \
259-
X(utf8mb4,297,hu_0900_as,cs) \
263+
X(utf8mb4,274,hu_0900,ai_ci) \
264+
X(utf8mb4,297,hu_0900,as_cs) \
260265
X(utf8mb4,225,icelandic,ci) \
261-
X(utf8mb4,257,is_0900_ai,ci) \
262-
X(utf8mb4,280,is_0900_as,cs) \
263-
X(utf8mb4,303,ja_0900_as,cs) \
266+
X(utf8mb4,257,is_0900,ai_ci) \
267+
X(utf8mb4,280,is_0900,as_cs) \
268+
X(utf8mb4,303,ja_0900,as_cs) \
264269
X(utf8mb4,226,latvian,ci) \
265-
X(utf8mb4,271,la_0900_ai,ci) \
266-
X(utf8mb4,294,la_0900_as,cs) \
270+
X(utf8mb4,271,la_0900,ai_ci) \
271+
X(utf8mb4,294,la_0900,as_cs) \
267272
X(utf8mb4,236,lithuanian,ci) \
268-
X(utf8mb4,268,lt_0900_ai,ci) \
269-
X(utf8mb4,291,lt_0900_as,cs) \
270-
X(utf8mb4,258,lv_0900_ai,ci) \
271-
X(utf8mb4,281,lv_0900_as,cs) \
273+
X(utf8mb4,268,lt_0900,ai_ci) \
274+
X(utf8mb4,291,lt_0900,as_cs) \
275+
X(utf8mb4,258,lv_0900,ai_ci) \
276+
X(utf8mb4,281,lv_0900,as_cs) \
272277
X(utf8mb4,240,persian,ci) \
273-
X(utf8mb4,261,pl_0900_ai,ci) \
274-
X(utf8mb4,284,pl_0900_as,cs) \
278+
X(utf8mb4,261,pl_0900,ai_ci) \
279+
X(utf8mb4,284,pl_0900,as_cs) \
275280
X(utf8mb4,229,polish,ci) \
276281
X(utf8mb4,227,romanian,ci) \
277282
X(utf8mb4,239,roman,ci) \
278-
X(utf8mb4,259,ro_0900_ai,ci) \
279-
X(utf8mb4,282,ro_0900_as,cs) \
283+
X(utf8mb4,259,ro_0900,ai_ci) \
284+
X(utf8mb4,282,ro_0900,as_cs) \
280285
X(utf8mb4,243,sinhala,ci) \
281-
X(utf8mb4,269,sk_0900_ai,ci) \
282-
X(utf8mb4,292,sk_0900_as,cs) \
286+
X(utf8mb4,269,sk_0900,ai_ci) \
287+
X(utf8mb4,292,sk_0900,as_cs) \
283288
X(utf8mb4,237,slovak,ci) \
284289
X(utf8mb4,228,slovenian,ci) \
285-
X(utf8mb4,260,sl_0900_ai,ci) \
286-
X(utf8mb4,283,sl_0900_as,cs) \
290+
X(utf8mb4,260,sl_0900,ai_ci) \
291+
X(utf8mb4,283,sl_0900,as_cs) \
287292
X(utf8mb4,238,spanish2,ci) \
288293
X(utf8mb4,231,spanish,ci) \
289-
X(utf8mb4,264,sv_0900_ai,ci) \
290-
X(utf8mb4,287,sv_0900_as,cs) \
294+
X(utf8mb4,264,sv_0900,ai_ci) \
295+
X(utf8mb4,287,sv_0900,as_cs) \
291296
X(utf8mb4,232,swedish,ci) \
292-
X(utf8mb4,265,tr_0900_ai,ci) \
293-
X(utf8mb4,288,tr_0900_as,cs) \
297+
X(utf8mb4,265,tr_0900,ai_ci) \
298+
X(utf8mb4,288,tr_0900,as_cs) \
294299
X(utf8mb4,233,turkish,ci) \
295300
X(utf8mb4,246,unicode_520,ci) \
296301
X(utf8mb4,224,unicode,ci) \
297302
X(utf8mb4,247,vietnamese,ci) \
298-
X(utf8mb4,277,vi_0900_ai,ci) \
299-
X(utf8mb4,300,vi_0900_as,cs) \
303+
X(utf8mb4,277,vi_0900,ai_ci) \
304+
X(utf8mb4,300,vi_0900,as_cs) \
305+
X(utf8mb4,304,ja_0900,as_cs_ks) \
306+
X(utf8mb4,305,uca0900,as_ci) \
307+
X(utf8mb4,306,ru_0900,ai_ci) \
308+
X(utf8mb4,307,ru_0900,as_cs) \
309+
X(utf8mb4,308,zh_0900,as_cs) \
310+
X(utf8mb4,309,uca0900,bin) \
311+
COLLATIONS_utf8mb4_EXTRA
312+
313+
#define COLLATIONS_utf8mb4_EXTRA
300314

301315

302316
#define COLLATIONS_cp1251(X) \

devapi/result.cc

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,17 @@ struct CollationInfo::Access
280280
{
281281
enum coll_case {
282282
case_ci = CollationInfo::case_ci,
283+
case_ai_ci = case_ci,
284+
case_as_ci = case_ci,
283285
case_cs = CollationInfo::case_cs,
286+
case_as_cs = case_cs,
287+
case_as_cs_ks = case_cs,
284288
case_bin = CollationInfo::case_bin
285289
};
286290

287-
static CollationInfo mk(CharacterSet _cs, unsigned _id, coll_case _case, const char *_name)
291+
static CollationInfo mk(
292+
CharacterSet _cs, unsigned _id, coll_case _case, const char *_name
293+
)
288294
{
289295
CollationInfo ci;
290296
ci.m_cs = _cs;
@@ -295,21 +301,71 @@ struct CollationInfo::Access
295301
}
296302
};
297303

304+
305+
/*
306+
A helper function that reconstructs MySQL collation name from the data
307+
given by COLLATIONS_XXX() lists. In most cases the collation name is just
308+
a concatenation of charset name, collation and sensitivity flags - this
309+
default name is passed as 'name' pre-allocated string. But there are few
310+
exceptions to the general rule: 'name_bin' is the name to be used for binary
311+
collations; also, individual components of the name are given to allow
312+
further customization.
313+
*/
314+
315+
const char*
316+
coll_name(
317+
std::string cs, std::string coll, std::string sensitivity,
318+
const char *name, const char *name_bin)
319+
{
320+
static std::list<std::string> special;
321+
322+
/*
323+
For generic UCA collations, such as uca0900, the "uca" prefix is
324+
not present in the MySQL collation name. For example, for the uca0900
325+
collation with "ai_ci" sensitivity, the collation name
326+
is "utf8mb4_0900_ai_ci" but the value of 'name' is "utf8mb4_uca0900_ai_ci",
327+
so we need to correct this.
328+
*/
329+
330+
if (coll.substr(0,3) == "uca")
331+
{
332+
special.push_back(cs + "_" + coll.substr(3) + "_" + sensitivity);
333+
return special.back().c_str();
334+
}
335+
336+
if (sensitivity == "bin")
337+
{
338+
// Note: special exception for "binary" collation (no _bin suffix)
339+
return cs == "binary" ? "binary" : name_bin;
340+
}
341+
else
342+
return name;
343+
}
344+
345+
298346
#define COLL_DEFS(CS) COLLATIONS_##CS(COLL_CONST_DEF)
299347

300348
#define COLL_CONST_DEF(CS,ID,COLL,CASE) \
301349
const CollationInfo \
302350
Collation<CharacterSet::CS>::COLL_CONST_NAME(COLL,CASE) = \
303351
CollationInfo::Access::mk(CharacterSet::CS, ID, \
304352
CollationInfo::Access::case_##CASE, \
305-
COLL_NAME_##CASE(CS,COLL));
353+
COLL_NAME(CS,COLL,CASE));
354+
355+
#define COLL_NAME(CS,COLL,CASE) \
356+
coll_name(#CS, #COLL, #CASE, #CS "_" #COLL "_" #CASE, #CS "_bin")
357+
358+
// Add utf8mb4 alias for bin collation for compatibility
306359

307-
#define COLL_NAME_bin(CS,COLL) #CS "_bin"
308-
#define COLL_NAME_ci(CS,COLL) #CS "_" #COLL "_ci"
309-
#define COLL_NAME_cs(CS,COLL) #CS "_" #COLL "_cs"
360+
#undef COLLATIONS_utf8mb4_EXTRA
361+
#define COLLATIONS_utf8mb4_EXTRA \
362+
const CollationInfo Collation<CharacterSet::utf8mb4>::utf8mb4 = \
363+
Collation<CharacterSet::utf8mb4>::bin;
310364

311365
CDK_CS_LIST(COLL_DEFS)
312366

367+
#undef COLLATIONS_utf8mb4_EXTRA
368+
#define COLLATIONS_utf8mb4_EXTRA
313369

314370
/*
315371
Handling result data

devapi/tests/types-t.cc

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,56 @@ TEST_F(Types, string)
497497
}
498498

499499

500+
inline
501+
const CollationInfo* get_collation(unsigned id)
502+
{
503+
#define COLL_FIND(COL) COLLATIONS_##COL(COLL_FIND1)
504+
#define COLL_FIND1(CS,ID,COLL,CASE) \
505+
case ID: return &Collation<CharacterSet::CS>::COLL_CONST_NAME(COLL,CASE);
506+
507+
switch (id)
508+
{
509+
CDK_CS_LIST(COLL_FIND)
510+
default:
511+
return nullptr;
512+
}
513+
}
514+
515+
516+
TEST_F(Types, collations)
517+
{
518+
using col_data = std::pair<unsigned, string>;
519+
std::vector<col_data> unknown;
520+
521+
Table t = getSchema("information_schema").getTable("collations");
522+
523+
for (Row r : t.select("id", "collation_name").execute())
524+
{
525+
col_data col = { r[0], r[1] };
526+
527+
const CollationInfo *info = get_collation(col.first);
528+
529+
if (!info)
530+
unknown.push_back(col);
531+
else
532+
{
533+
EXPECT_EQ(std::string{ col.second }, std::string{ info->getName() })
534+
<< "bad collation name";
535+
}
536+
}
537+
538+
if (!unknown.empty())
539+
{
540+
cout << "Unknown collations:" << endl;
541+
for (col_data col : unknown)
542+
{
543+
cout << " -" << col.first << ": " << col.second << endl;
544+
}
545+
FAIL() << "There are unknown collations";
546+
}
547+
}
548+
549+
500550
TEST_F(Types, blob)
501551
{
502552
SKIP_IF_NO_XPLUGIN;

include/mysqlx/devapi/collations.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,26 @@ static PUBLIC_API const CollationInfo COLL_CONST_NAME(COLL,CASE);
182182

183183
#define COLL_CONST_NAME(COLL,CASE) COLL_CONST_NAME_##CASE(COLL)
184184

185-
#define COLL_CONST_NAME_bin(COLL) COLL
186-
#define COLL_CONST_NAME_ci(COLL) COLL##_ci
187-
#define COLL_CONST_NAME_cs(COLL) COLL##_cs
185+
#define COLL_CONST_NAME_bin(COLL) COLL
186+
#define COLL_CONST_NAME_ci(COLL) COLL##_ci
187+
#define COLL_CONST_NAME_ai_ci(COLL) COLL##_ai_ci
188+
#define COLL_CONST_NAME_cs(COLL) COLL##_cs
189+
#define COLL_CONST_NAME_as_cs(COLL) COLL##_as_cs
190+
#define COLL_CONST_NAME_as_ci(COLL) COLL##_as_ci
191+
#define COLL_CONST_NAME_as_cs_ks(COLL) COLL##_as_cs_ks
192+
193+
// Add utf8mb4 alias for bin collation for compatibility
194+
195+
#undef COLLATIONS_utf8mb4_EXTRA
196+
#define COLLATIONS_utf8mb4_EXTRA \
197+
static PUBLIC_API const CollationInfo utf8mb4;
188198

189199
CDK_CS_LIST(COLL_DECL)
190200

201+
#undef COLLATIONS_utf8mb4_EXTRA
202+
#define COLLATIONS_utf8mb4_EXTRA
203+
204+
191205
MYSQLX_ABI_END(2,0)
192206
} // mysqlx
193207

0 commit comments

Comments
 (0)