Skip to content

Commit 82764da

Browse files
committed
BUG30416704: Binary columns returned as strings
This patch fixes the result of binary columns that were returning strings instead, for both pure Python and C extension. Existing tests were changed accordingly, and added new ones for regression.
1 parent 55f67e8 commit 82764da

13 files changed

+273
-108
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ v8.0.24
1818
- BUG#32435181: Add support for Django 3.2
1919
- BUG#32029891: Add context manager support for pooled connections
2020
- BUG#31490101: Fix wrong cast of Python unicode to std::string
21+
- BUG#30416704: Binary columns returned as strings
2122

2223
v8.0.23
2324
=======

lib/mysql/connector/conversion.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2009, 2020, Oracle and/or its affiliates.
1+
# Copyright (c) 2009, 2021, Oracle and/or its affiliates.
22
#
33
# This program is free software; you can redistribute it and/or modify
44
# it under the terms of the GNU General Public License, version 2.0, as
@@ -582,21 +582,15 @@ def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103
582582
583583
Returns string typed columns as string type.
584584
"""
585+
if self.charset == "binary":
586+
return value
585587
if dsc is not None:
586-
# Check if we deal with a SET
588+
if dsc[1] == FieldType.JSON and self.use_unicode:
589+
return value.decode(self.charset)
587590
if dsc[7] & FieldFlag.SET:
588591
return self._SET_to_python(value, dsc)
589-
if dsc[7] & FieldFlag.BINARY:
590-
if self.charset != 'binary' and not isinstance(value, str):
591-
try:
592-
return value.decode(self.charset)
593-
except (LookupError, UnicodeDecodeError):
594-
return value
595-
else:
596-
return value
597-
598-
if self.charset == 'binary':
599-
return value
592+
if dsc[8] == 63: # 'binary' charset
593+
return value
600594
if isinstance(value, (bytes, bytearray)) and self.use_unicode:
601595
return value.decode(self.charset)
602596

lib/mysql/connector/protocol.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2009, 2020, Oracle and/or its affiliates.
1+
# Copyright (c) 2009, 2021, Oracle and/or its affiliates.
22
#
33
# This program is free software; you can redistribute it and/or modify
44
# it under the terms of the GNU General Public License, version 2.0, as
@@ -266,7 +266,7 @@ def parse_column_count(self, packet):
266266
except (struct.error, ValueError):
267267
raise errors.InterfaceError("Failed parsing column count")
268268

269-
def parse_column(self, packet, charset='utf-8'):
269+
def parse_column(self, packet, encoding='utf-8'):
270270
"""Parse a MySQL column-packet"""
271271
(packet, _) = utils.read_lc_string(packet[4:]) # catalog
272272
(packet, _) = utils.read_lc_string(packet) # db
@@ -276,20 +276,26 @@ def parse_column(self, packet, charset='utf-8'):
276276
(packet, _) = utils.read_lc_string(packet) # org_name
277277

278278
try:
279-
(_, _, field_type,
280-
flags, _) = struct.unpack('<xHIBHBxx', packet)
279+
(
280+
charset,
281+
_,
282+
column_type,
283+
flags,
284+
_,
285+
) = struct.unpack('<xHIBHBxx', packet)
281286
except struct.error:
282287
raise errors.InterfaceError("Failed parsing column information")
283288

284289
return (
285-
name.decode(charset),
286-
field_type,
290+
name.decode(encoding),
291+
column_type,
287292
None, # display_size
288293
None, # internal_size
289294
None, # precision
290295
None, # scale
291296
~flags & FieldFlag.NOT_NULL, # null_ok
292297
flags, # MySQL specific
298+
charset,
293299
)
294300

295301
def parse_eof(self, packet):

src/include/mysql_capi_conversion.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0, as
@@ -70,8 +70,11 @@ PyObject*
7070
mytopy_bit(const char *data, const unsigned long length);
7171

7272
PyObject*
73-
mytopy_string(const char *data, const unsigned long length,
74-
const unsigned long flags, const char *charset,
73+
mytopy_string(const char *data,
74+
enum_field_types field_type,
75+
const unsigned int field_charsetnr,
76+
const unsigned long field_length,
77+
const char *charset,
7578
unsigned int use_unicode);
7679

7780
#endif /* MYCONNPY_MYSQL_CAPI_CONVERSION_H */

src/mysql_capi.c

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014, 2020, Oracle and/or its affiliates.
2+
* Copyright (c) 2014, 2021, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0, as
@@ -213,33 +213,57 @@ fetch_fields(MYSQL_RES *result, unsigned int num_fields, MY_CHARSET_INFO *cs,
213213
{
214214
field = PyTuple_New(11);
215215

216-
decoded= mytopy_string(myfs[i].catalog, myfs[i].catalog_length,
217-
myfs[i].flags, charset, use_unicode);
216+
decoded= mytopy_string(myfs[i].catalog,
217+
myfs[i].type,
218+
45,
219+
myfs[i].catalog_length,
220+
charset,
221+
use_unicode);
218222
if (NULL == decoded) return NULL; // decode error
219223
PyTuple_SET_ITEM(field, 0, decoded);
220224

221-
decoded= mytopy_string(myfs[i].db, myfs[i].db_length,
222-
myfs[i].flags, charset, use_unicode);
225+
decoded= mytopy_string(myfs[i].db,
226+
myfs[i].type,
227+
45,
228+
myfs[i].db_length,
229+
charset,
230+
use_unicode);
223231
if (NULL == decoded) return NULL; // decode error
224232
PyTuple_SET_ITEM(field, 1, decoded);
225233

226-
decoded= mytopy_string(myfs[i].table, myfs[i].table_length,
227-
myfs[i].flags, charset, use_unicode);
234+
decoded= mytopy_string(myfs[i].table,
235+
myfs[i].type,
236+
45,
237+
myfs[i].table_length,
238+
charset,
239+
use_unicode);
228240
if (NULL == decoded) return NULL; // decode error
229241
PyTuple_SET_ITEM(field, 2, decoded);
230242

231-
decoded= mytopy_string(myfs[i].org_table, myfs[i].org_table_length,
232-
myfs[i].flags, charset, use_unicode);
243+
decoded= mytopy_string(myfs[i].org_table,
244+
myfs[i].type,
245+
45,
246+
myfs[i].org_table_length,
247+
charset,
248+
use_unicode);
233249
if (NULL == decoded) return NULL; // decode error
234250
PyTuple_SET_ITEM(field, 3, decoded);
235251

236-
decoded= mytopy_string(myfs[i].name, myfs[i].name_length,
237-
myfs[i].flags, charset, use_unicode);
252+
decoded= mytopy_string(myfs[i].name,
253+
myfs[i].type,
254+
45,
255+
myfs[i].name_length,
256+
charset,
257+
use_unicode);
238258
if (NULL == decoded) return NULL; // decode error
239259
PyTuple_SET_ITEM(field, 4, decoded);
240260

241-
decoded= mytopy_string(myfs[i].org_name, myfs[i].org_name_length,
242-
myfs[i].flags, charset, use_unicode);
261+
decoded= mytopy_string(myfs[i].org_name,
262+
myfs[i].type,
263+
45,
264+
myfs[i].org_name_length,
265+
charset,
266+
use_unicode);
243267
if (NULL == decoded) return NULL; // decode error
244268
PyTuple_SET_ITEM(field, 5, decoded);
245269

@@ -383,13 +407,9 @@ MySQL_init(MySQL *self, PyObject *args, PyObject *kwds)
383407
self->raw= self->raw_at_connect;
384408
}
385409

386-
self->use_unicode= 0;
387-
if (use_unicode)
410+
if (use_unicode && use_unicode == Py_False)
388411
{
389-
if (use_unicode == Py_True)
390-
{
391-
self->use_unicode= 1;
392-
}
412+
self->use_unicode= 0;
393413
}
394414

395415
if (charset_name) {
@@ -2383,7 +2403,7 @@ MySQL_fetch_row(MySQL *self)
23832403
unsigned long *field_lengths;
23842404
unsigned int num_fields;
23852405
unsigned int i;
2386-
unsigned long field_type, field_flags;
2406+
unsigned long field_charsetnr, field_type, field_flags;
23872407
const char *charset= NULL;
23882408

23892409
CHECK_SESSION(self);
@@ -2458,6 +2478,7 @@ MySQL_fetch_row(MySQL *self)
24582478
Py_RETURN_NONE;
24592479
}
24602480

2481+
field_charsetnr= PyLong_AsUnsignedLong(PyTuple_GetItem(field_info, 6));
24612482
field_type= PyLong_AsUnsignedLong(PyTuple_GetItem(field_info, 8));
24622483
field_flags= PyLong_AsUnsignedLong(PyTuple_GetItem(field_info, 9));
24632484

@@ -2491,8 +2512,12 @@ MySQL_fetch_row(MySQL *self)
24912512
field_type == MYSQL_TYPE_ENUM ||
24922513
field_type == MYSQL_TYPE_VAR_STRING)
24932514
{
2494-
value= mytopy_string(row[i], field_lengths[i], field_flags,
2495-
charset, self->use_unicode);
2515+
value= mytopy_string(row[i],
2516+
field_type,
2517+
field_charsetnr,
2518+
field_lengths[i],
2519+
charset,
2520+
self->use_unicode);
24962521
if (!value)
24972522
{
24982523
goto error;
@@ -2563,8 +2588,12 @@ MySQL_fetch_row(MySQL *self)
25632588
}
25642589
else
25652590
{
2566-
value= mytopy_string(row[i], field_lengths[i], field_flags,
2567-
charset, self->use_unicode);
2591+
value= mytopy_string(row[i],
2592+
field_type,
2593+
field_charsetnr,
2594+
field_lengths[i],
2595+
charset,
2596+
self->use_unicode);
25682597
}
25692598
PyTuple_SET_ITEM(result_row, i, value);
25702599
}
@@ -2577,8 +2606,12 @@ MySQL_fetch_row(MySQL *self)
25772606
else
25782607
{
25792608
// Do our best to convert whatever we got from MySQL to a str/bytes
2580-
value = mytopy_string(row[i], field_lengths[i], field_flags,
2581-
charset, self->use_unicode);
2609+
value = mytopy_string(row[i],
2610+
field_type,
2611+
field_charsetnr,
2612+
field_lengths[i],
2613+
charset,
2614+
self->use_unicode);
25822615
PyTuple_SET_ITEM(result_row, i, value);
25832616
}
25842617
}
@@ -3553,6 +3586,10 @@ MySQLPrepStmt_fetch_row(MySQLPrepStmt *self)
35533586
PyTuple_SET_ITEM(row, i,
35543587
mytopy_bit(PyBytes_AsString(obj), self->cols[i].length));
35553588
}
3589+
else if (field->charsetnr == 63) /* 'binary' charset */
3590+
{
3591+
PyTuple_SET_ITEM(row, i, PyByteArray_FromObject(obj));
3592+
}
35563593
else
35573594
{
35583595
PyTuple_SET_ITEM(row, i, PyUnicode_FromString(PyBytes_AsString(obj)));

src/mysql_capi_conversion.c

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014, 2020, Oracle and/or its affiliates.
2+
* Copyright (c) 2014, 2021, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0, as
@@ -711,32 +711,40 @@ pytomy_decimal(PyObject *obj)
711711
Convert, and decode if needed, a string MySQL value to
712712
Python str or bytes.
713713
714-
@param data string to be converted
715-
@param length length of data
716-
@param flags field flags
717-
@param charset character used for decoding
718-
@param use_unicode return Unicode
714+
@param data string to be converted
715+
@param field_type field type
716+
@param field_charsetnr charset number
717+
@param field_length length of data
718+
@param charset character used for decoding
719+
@param use_unicode use unicode
719720
720721
@return Converted string
721-
@retval PyUnicode if not BINARY_FLAG
722-
@retval PyBytes Python v3 if not use_unicode
723-
@retval NULL Exception
722+
@retval PyUnicode if use unicode
723+
@retval PyBytes if not use_unicode or charset is 'binary'
724+
@retval NULL Exception
724725
*/
725726
PyObject*
726-
mytopy_string(const char *data, const unsigned long length,
727-
const unsigned long flags, const char *charset,
727+
mytopy_string(const char *data,
728+
enum_field_types field_type,
729+
const unsigned int field_charsetnr,
730+
const unsigned long field_length,
731+
const char *charset,
728732
unsigned int use_unicode)
729733
{
730734
if (!charset || !data) {
731735
return NULL;
732736
}
733737

734-
if (!((flags != 0) & flags & BINARY_FLAG) && use_unicode && strcmp(charset, "binary") != 0)
738+
if (strcmp(charset, "binary") == 0)
735739
{
736-
return PyUnicode_Decode(data, length, charset, NULL);
740+
return PyByteArray_FromStringAndSize(data, field_length);
737741
}
738-
else
742+
743+
/* 'binary' charset = 63 */
744+
if (use_unicode && (field_type == MYSQL_TYPE_JSON || field_charsetnr != 63))
739745
{
740-
return PyBytes_FromStringAndSize(data, length);
746+
return PyUnicode_Decode(data, field_length, charset, NULL);
741747
}
748+
749+
return PyByteArray_FromStringAndSize(data, field_length);
742750
}

tests/cext/test_cext_api.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
# Copyright (c) 2014, 2020, Oracle and/or its affiliates.
3+
# Copyright (c) 2014, 2021, Oracle and/or its affiliates.
44
#
55
# This program is free software; you can redistribute it and/or modify
66
# it under the terms of the GNU General Public License, version 2.0, as
@@ -337,12 +337,12 @@ def test_select_db(self):
337337

338338
cmy.select_db('mysql')
339339
cmy.query("SELECT DATABASE()")
340-
self.assertEqual(b'mysql', cmy.fetch_row()[0])
340+
self.assertEqual('mysql', cmy.fetch_row()[0])
341341
cmy.free_result()
342342

343343
cmy.select_db('myconnpy')
344344
cmy.query("SELECT DATABASE()")
345-
self.assertEqual(b'myconnpy', cmy.fetch_row()[0])
345+
self.assertEqual('myconnpy', cmy.fetch_row()[0])
346346
cmy.free_result()
347347

348348
def test_affected_rows(self):
@@ -612,12 +612,12 @@ def test_set_character_set(self):
612612
cmy1.set_character_set, 'ham_spam')
613613

614614
variables = ('character_set_connection',)
615-
exp = b'utf8'
615+
exp = 'utf8'
616616
self.assertIn(
617617
exp,
618-
get_variables(cmy1, variables=variables)[b'character_set_connection'])
618+
get_variables(cmy1, variables=variables)['character_set_connection'])
619619

620-
exp = {b'character_set_connection': b'big5',}
620+
exp = {'character_set_connection': 'big5'}
621621
cmy1.set_character_set('big5')
622622
self.assertEqual(exp, get_variables(cmy1, variables=variables))
623623

@@ -681,7 +681,7 @@ def create_table(charset):
681681
fetched = fetch_rows(cmy)[0][0]
682682
except UnicodeEncodeError:
683683
self.fail("Could not encode {0}".format(encoding))
684-
self.assertEqual(case, fetched.decode(encoding),
684+
self.assertEqual(case, fetched,
685685
"Failed with case {0}/{1}".format(i, encoding))
686686

687687
cmy.query("DROP TABLE IF EXISTS {0}".format(table))
@@ -838,9 +838,9 @@ def test_next_result(self):
838838
"SELECT 'SPAM'",
839839
)
840840
exp = [
841-
[(b'HAM',)],
841+
[('HAM',)],
842842
{'insert_id': 1, 'affected': 1},
843-
[(b'SPAM',)]
843+
[('SPAM',)]
844844
]
845845

846846
result = []

0 commit comments

Comments
 (0)