Skip to content

Commit 397cbc8

Browse files
committed
Fixes in code handling MySQL collation information:
- add bin constant for utf8mb4 binary collation (to be consistent with other charsets) - fix collation names to agree with ones used in the server - change the way collation sensitivity info is handled
1 parent 7e8b2bd commit 397cbc8

File tree

4 files changed

+185
-104
lines changed

4 files changed

+185
-104
lines changed

cdk/include/mysql/cdk/protocol/mysqlx/collations.h

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,18 @@
3939
#define MYSQL_CDK_PROTOCOL_MYSQLX_COLLATIONS_H
4040

4141
/*
42-
Each line X(CS, ID, COLL, CASE) in the expansion of
42+
Each line X(CS, ID, COLL, SENSITIVITY) in the expansion of
4343
a COLLATION_XXX() macro declares collation with name COLL for character set
44-
CS. ID is the MySQL id number for the collation. CASE is one of ci, cs or bin
45-
and indicates whether it is case insensitive, sensitive or binary collation,
46-
respectively.
44+
CS. ID is the MySQL id number for the collation. SENSITIVITY is either 'bin'
45+
for binary collations or a combination of sensitivity flags such as 'ai_ci',
46+
using the same conventions as the ones used in MySQL collation names.
47+
48+
Note: CS, COLL and SENSITIVITY are used to reconstruct the full MySQL name of
49+
the collation and should follow the same naming conventions (with few
50+
exceptions that we handle separately)
4751
*/
4852

53+
4954
#define COLLATIONS_big5(X) \
5055
X(big5,1,chinese,ci) \
5156
X(big5,84,bin,bin) \
@@ -228,75 +233,78 @@
228233
X(latin7,79,bin,bin) \
229234

230235
#define COLLATIONS_utf8mb4(X) \
231-
X(utf8mb4,255,uca0900_ai,ci) \
232-
X(utf8mb4,278,uca0900_as,cs) \
233-
X(utf8mb4,46,utf8mb4,bin) \
236+
X(utf8mb4,255,uca0900,ai_ci) \
237+
X(utf8mb4,278,uca0900,as_cs) \
238+
X(utf8mb4,46,bin,bin) \
234239
X(utf8mb4,245,croatian,ci) \
235-
X(utf8mb4,266,cs_0900_ai,ci) \
236-
X(utf8mb4,289,cs_0900_as,cs) \
240+
X(utf8mb4,266,cs_0900,ai_ci) \
241+
X(utf8mb4,289,cs_0900,as_cs) \
237242
X(utf8mb4,234,czech,ci) \
238243
X(utf8mb4,235,danish,ci) \
239-
X(utf8mb4,267,da_0900_ai,ci) \
240-
X(utf8mb4,290,da_0900_as,cs) \
241-
X(utf8mb4,256,de_pb_0900_ai,ci) \
242-
X(utf8mb4,279,de_pb_0900_as,cs) \
243-
X(utf8mb4,273,eo_0900_ai,ci) \
244-
X(utf8mb4,296,eo_0900_as,cs) \
244+
X(utf8mb4,267,da_0900,ai_ci) \
245+
X(utf8mb4,290,da_0900,as_cs) \
246+
X(utf8mb4,256,de_pb_0900,ai_ci) \
247+
X(utf8mb4,279,de_pb_0900,as_cs) \
248+
X(utf8mb4,273,eo_0900,ai_ci) \
249+
X(utf8mb4,296,eo_0900,as_cs) \
245250
X(utf8mb4,241,esperanto,ci) \
246251
X(utf8mb4,230,estonian,ci) \
247-
X(utf8mb4,263,es_0900_ai,ci) \
248-
X(utf8mb4,286,es_0900_as,cs) \
249-
X(utf8mb4,270,es_trad_0900_ai,ci) \
250-
X(utf8mb4,293,es_trad_0900_as,cs) \
251-
X(utf8mb4,262,et_0900_ai,ci) \
252-
X(utf8mb4,285,et_0900_as,cs) \
252+
X(utf8mb4,263,es_0900,ai_ci) \
253+
X(utf8mb4,286,es_0900,as_cs) \
254+
X(utf8mb4,270,es_trad_0900,ai_ci) \
255+
X(utf8mb4,293,es_trad_0900,as_cs) \
256+
X(utf8mb4,262,et_0900,ai_ci) \
257+
X(utf8mb4,285,et_0900,as_cs) \
253258
X(utf8mb4,45,general,ci) \
254259
X(utf8mb4,244,german2,ci) \
255-
X(utf8mb4,275,hr_0900_ai,ci) \
256-
X(utf8mb4,298,hr_0900_as,cs) \
260+
X(utf8mb4,275,hr_0900,ai_ci) \
261+
X(utf8mb4,298,hr_0900,as_cs) \
257262
X(utf8mb4,242,hungarian,ci) \
258-
X(utf8mb4,274,hu_0900_ai,ci) \
259-
X(utf8mb4,297,hu_0900_as,cs) \
263+
X(utf8mb4,274,hu_0900,ai_ci) \
264+
X(utf8mb4,297,hu_0900,as_cs) \
260265
X(utf8mb4,225,icelandic,ci) \
261-
X(utf8mb4,257,is_0900_ai,ci) \
262-
X(utf8mb4,280,is_0900_as,cs) \
263-
X(utf8mb4,303,ja_0900_as,cs) \
266+
X(utf8mb4,257,is_0900,ai_ci) \
267+
X(utf8mb4,280,is_0900,as_cs) \
268+
X(utf8mb4,303,ja_0900,as_cs) \
264269
X(utf8mb4,226,latvian,ci) \
265-
X(utf8mb4,271,la_0900_ai,ci) \
266-
X(utf8mb4,294,la_0900_as,cs) \
270+
X(utf8mb4,271,la_0900,ai_ci) \
271+
X(utf8mb4,294,la_0900,as_cs) \
267272
X(utf8mb4,236,lithuanian,ci) \
268-
X(utf8mb4,268,lt_0900_ai,ci) \
269-
X(utf8mb4,291,lt_0900_as,cs) \
270-
X(utf8mb4,258,lv_0900_ai,ci) \
271-
X(utf8mb4,281,lv_0900_as,cs) \
273+
X(utf8mb4,268,lt_0900,ai_ci) \
274+
X(utf8mb4,291,lt_0900,as_cs) \
275+
X(utf8mb4,258,lv_0900,ai_ci) \
276+
X(utf8mb4,281,lv_0900,as_cs) \
272277
X(utf8mb4,240,persian,ci) \
273-
X(utf8mb4,261,pl_0900_ai,ci) \
274-
X(utf8mb4,284,pl_0900_as,cs) \
278+
X(utf8mb4,261,pl_0900,ai_ci) \
279+
X(utf8mb4,284,pl_0900,as_cs) \
275280
X(utf8mb4,229,polish,ci) \
276281
X(utf8mb4,227,romanian,ci) \
277282
X(utf8mb4,239,roman,ci) \
278-
X(utf8mb4,259,ro_0900_ai,ci) \
279-
X(utf8mb4,282,ro_0900_as,cs) \
283+
X(utf8mb4,259,ro_0900,ai_ci) \
284+
X(utf8mb4,282,ro_0900,as_cs) \
280285
X(utf8mb4,243,sinhala,ci) \
281-
X(utf8mb4,269,sk_0900_ai,ci) \
282-
X(utf8mb4,292,sk_0900_as,cs) \
286+
X(utf8mb4,269,sk_0900,ai_ci) \
287+
X(utf8mb4,292,sk_0900,as_cs) \
283288
X(utf8mb4,237,slovak,ci) \
284289
X(utf8mb4,228,slovenian,ci) \
285-
X(utf8mb4,260,sl_0900_ai,ci) \
286-
X(utf8mb4,283,sl_0900_as,cs) \
290+
X(utf8mb4,260,sl_0900,ai_ci) \
291+
X(utf8mb4,283,sl_0900,as_cs) \
287292
X(utf8mb4,238,spanish2,ci) \
288293
X(utf8mb4,231,spanish,ci) \
289-
X(utf8mb4,264,sv_0900_ai,ci) \
290-
X(utf8mb4,287,sv_0900_as,cs) \
294+
X(utf8mb4,264,sv_0900,ai_ci) \
295+
X(utf8mb4,287,sv_0900,as_cs) \
291296
X(utf8mb4,232,swedish,ci) \
292-
X(utf8mb4,265,tr_0900_ai,ci) \
293-
X(utf8mb4,288,tr_0900_as,cs) \
297+
X(utf8mb4,265,tr_0900,ai_ci) \
298+
X(utf8mb4,288,tr_0900,as_cs) \
294299
X(utf8mb4,233,turkish,ci) \
295300
X(utf8mb4,246,unicode_520,ci) \
296301
X(utf8mb4,224,unicode,ci) \
297302
X(utf8mb4,247,vietnamese,ci) \
298-
X(utf8mb4,277,vi_0900_ai,ci) \
299-
X(utf8mb4,300,vi_0900_as,cs) \
303+
X(utf8mb4,277,vi_0900,ai_ci) \
304+
X(utf8mb4,300,vi_0900,as_cs) \
305+
COLLATIONS_utf8mb4_EXTRA
306+
307+
#define COLLATIONS_utf8mb4_EXTRA
300308

301309

302310
#define COLLATIONS_cp1251(X) \

devapi/result.cc

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,15 @@ struct CollationInfo::Access
280280
{
281281
enum coll_case {
282282
case_ci = CollationInfo::case_ci,
283+
case_ai_ci = case_ci,
283284
case_cs = CollationInfo::case_cs,
285+
case_as_cs = case_cs,
284286
case_bin = CollationInfo::case_bin
285287
};
286288

287-
static CollationInfo mk(CharacterSet _cs, unsigned _id, coll_case _case, const char *_name)
289+
static CollationInfo mk(
290+
CharacterSet _cs, unsigned _id, coll_case _case, const char *_name
291+
)
288292
{
289293
CollationInfo ci;
290294
ci.m_cs = _cs;
@@ -295,21 +299,69 @@ struct CollationInfo::Access
295299
}
296300
};
297301

302+
303+
/*
304+
A helper function that reconstructs MySQL collation name from the data
305+
given by COLLATIONS_XXX() lists. In most cases the collation name is just
306+
a concatenation of charset name, collation and sensitivity flags - this
307+
default name is passed as 'name' pre-allocated string. But there are few
308+
exceptions to the general rule: 'name_bin' is the name to be used for binary
309+
collations; also, individual components of the name are given to allow
310+
further customization.
311+
*/
312+
313+
const char*
314+
coll_name(
315+
std::string cs, std::string coll, std::string sensitivity,
316+
const char *name, const char *name_bin)
317+
{
318+
static std::vector<std::string> special;
319+
320+
// Note: special exception for "binary" collation (no _bin suffix)
321+
322+
if (sensitivity == "bin")
323+
return cs == "binary" ? "binary" : name_bin;
324+
325+
/*
326+
For generic UCA collations, such as uca0900, the "uca" prefix is
327+
not present in the MySQL collation name. For example, for the uca0900
328+
collation with "ai_ci" sensitivity, the collation name
329+
is "utf8mb4_0900_ai_ci" but the value of 'name' is "utf8mb4_uca0900_ai_ci",
330+
so we need to correct this.
331+
*/
332+
333+
if (coll.substr(0,3) == "uca")
334+
{
335+
special.push_back(cs + "_" + coll.substr(3) + "_" + sensitivity);
336+
return special.back().c_str();
337+
}
338+
return name;
339+
}
340+
341+
298342
#define COLL_DEFS(CS) COLLATIONS_##CS(COLL_CONST_DEF)
299343

300344
#define COLL_CONST_DEF(CS,ID,COLL,CASE) \
301345
const CollationInfo \
302346
Collation<CharacterSet::CS>::COLL_CONST_NAME(COLL,CASE) = \
303347
CollationInfo::Access::mk(CharacterSet::CS, ID, \
304348
CollationInfo::Access::case_##CASE, \
305-
COLL_NAME_##CASE(CS,COLL));
349+
COLL_NAME(CS,COLL,CASE));
350+
351+
#define COLL_NAME(CS,COLL,CASE) \
352+
coll_name(#CS, #COLL, #CASE, #CS "_" #COLL "_" #CASE, #CS "_bin")
353+
354+
// Add utf8mb4 alias for bin collation for compatibility
306355

307-
#define COLL_NAME_bin(CS,COLL) #CS "_bin"
308-
#define COLL_NAME_ci(CS,COLL) #CS "_" #COLL "_ci"
309-
#define COLL_NAME_cs(CS,COLL) #CS "_" #COLL "_cs"
356+
#undef COLLATIONS_utf8mb4_EXTRA
357+
#define COLLATIONS_utf8mb4_EXTRA \
358+
const CollationInfo Collation<CharacterSet::utf8mb4>::utf8mb4 = \
359+
Collation<CharacterSet::utf8mb4>::bin;
310360

311361
CDK_CS_LIST(COLL_DEFS)
312362

363+
#undef COLLATIONS_utf8mb4_EXTRA
364+
#define COLLATIONS_utf8mb4_EXTRA
313365

314366
/*
315367
Handling result data

include/mysqlx/devapi/collations.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,24 @@ static PUBLIC_API const CollationInfo COLL_CONST_NAME(COLL,CASE);
182182

183183
#define COLL_CONST_NAME(COLL,CASE) COLL_CONST_NAME_##CASE(COLL)
184184

185-
#define COLL_CONST_NAME_bin(COLL) COLL
186-
#define COLL_CONST_NAME_ci(COLL) COLL##_ci
187-
#define COLL_CONST_NAME_cs(COLL) COLL##_cs
185+
#define COLL_CONST_NAME_bin(COLL) COLL
186+
#define COLL_CONST_NAME_ci(COLL) COLL##_ci
187+
#define COLL_CONST_NAME_ai_ci(COLL) COLL##_ai_ci
188+
#define COLL_CONST_NAME_cs(COLL) COLL##_cs
189+
#define COLL_CONST_NAME_as_cs(COLL) COLL##_as_cs
190+
191+
// Add utf8mb4 alias for bin collation for compatibility
192+
193+
#undef COLLATIONS_utf8mb4_EXTRA
194+
#define COLLATIONS_utf8mb4_EXTRA \
195+
static PUBLIC_API const CollationInfo utf8mb4;
188196

189197
CDK_CS_LIST(COLL_DECL)
190198

199+
#undef COLLATIONS_utf8mb4_EXTRA
200+
#define COLLATIONS_utf8mb4_EXTRA
201+
202+
191203
MYSQLX_ABI_END(2,0)
192204
} // mysqlx
193205

0 commit comments

Comments
 (0)