Skip to content

Commit 7abcdc8

Browse files
rsomla1bkandasa
authored andcommitted
bug#28240202: UTF8MB4 CHARACTER DATA NOT HANDLED CORRECTLY
Fixed by using utf8 codec not only for utf8 but also for utf8mb4 charset. (cherry picked from commit 3b254d9e8a69916b4b085b42ff123b9500326836)
1 parent 37739f3 commit 7abcdc8

File tree

2 files changed

+30
-6
lines changed

2 files changed

+30
-6
lines changed

cdk/core/codec.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,14 @@ foundation::api::String_codec* Format<TYPE_STRING>::codec() const
108108
static foundation::String_codec<foundation::codecvt_utf8> utf8;
109109
static foundation::String_codec<foundation::codecvt_ascii> ascii;
110110

111-
return Charset::utf8 == charset() ?
112-
(foundation::api::String_codec*)&utf8
113-
: (foundation::api::String_codec*)&ascii;
111+
switch (charset())
112+
{
113+
case Charset::utf8:
114+
case Charset::utf8mb4:
115+
return (foundation::api::String_codec*)&utf8;
116+
default:
117+
return (foundation::api::String_codec*)&ascii;
118+
}
114119
}
115120

116121

devapi/tests/types-t.cc

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,9 @@ TEST_F(Types, string)
408408
"CREATE TABLE test.types("
409409
" c0 VARCHAR(10) COLLATE latin2_general_ci,"
410410
" c1 VARCHAR(32) COLLATE utf8_swedish_ci,"
411-
" c2 VARCHAR(32) CHARACTER SET latin2"
411+
" c2 VARCHAR(32) CHARACTER SET latin2,"
412+
" c3 VARCHAR(32) CHARACTER SET utf8mb4,"
413+
" c4 VARCHAR(32)" // use default collation
412414
")"
413415
);
414416

@@ -417,12 +419,13 @@ TEST_F(Types, string)
417419
string str0(L"Foobar");
418420
string str1(L"Mog\u0119 je\u015B\u0107 szk\u0142o");
419421

420-
types.insert().values(str0, str1, str1).execute();
422+
types.insert().values(str0, str1, str1, str1, str1).execute();
421423

422424
cout << "Table prepared, querying it..." << endl;
423425

424426
RowResult res = getSchema("test").getTable("types").select().execute();
425427

428+
426429
const Column &c0 = res.getColumn(0);
427430
EXPECT_EQ(Type::STRING, c0.getType());
428431
cout << "column #0 length: " << c0.getLength() << endl;
@@ -456,10 +459,26 @@ TEST_F(Types, string)
456459

457460
EXPECT_EQ(CharacterSet::latin2, c2.getCharacterSet());
458461

462+
const Column &c3 = res.getColumn(3);
463+
EXPECT_EQ(Type::STRING, c3.getType());
464+
cout << "column #3 length: " << c3.getLength() << endl;
465+
cout << "column #3 charset: " << c3.getCharacterSetName() << endl;
466+
cout << "column #3 collation: " << c3.getCollationName() << endl;
467+
468+
EXPECT_EQ(CharacterSet::utf8mb4, c3.getCharacterSet());
469+
470+
const Column &c4 = res.getColumn(4);
471+
EXPECT_EQ(Type::STRING, c4.getType());
472+
cout << "column #4 length: " << c4.getLength() << endl;
473+
cout << "column #4 charset: " << c4.getCharacterSetName() << endl;
474+
cout << "column #4 collation: " << c4.getCollationName() << endl;
475+
459476
Row row = res.fetchOne();
460477

461478
EXPECT_EQ(str0, (string)row[0]);
462479
EXPECT_EQ(str1, (string)row[1]);
480+
EXPECT_EQ(str1, (string)row[3]);
481+
EXPECT_EQ(str1, (string)row[4]);
463482

464483
/*
465484
FIXME: the third colum contains non-utf8 string which uses non-ascii
@@ -469,7 +488,7 @@ TEST_F(Types, string)
469488
Replace with EXPECT_EQ() once we handle all MySQL charsets.
470489
*/
471490

472-
EXPECT_THROW((string)row[2], Error);
491+
//EXPECT_THROW((string)row[2], Error);
473492
}
474493

475494

0 commit comments

Comments
 (0)