Skip to content

Commit 6c3d8d3

Browse files
jimmodpgeorge
authored andcommitted
py/objstr: Always validate utf-8 for mp_obj_new_str.
All uses of this are either tiny strings or not-known-to-be-safe. Update comments for mp_obj_new_str_copy and mp_obj_new_str_of_type. Signed-off-by: Jim Mussared <[email protected]>
1 parent 3a910b1 commit 6c3d8d3

File tree

3 files changed

+12
-11
lines changed

3 files changed

+12
-11
lines changed

py/obj.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -789,11 +789,11 @@ mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value);
789789
mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base);
790790
mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
791791
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a multi-precision integer object (or raise an overflow exception)
792-
mp_obj_t mp_obj_new_str(const char *data, size_t len);
793-
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len);
794-
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr);
792+
mp_obj_t mp_obj_new_str(const char *data, size_t len); // will check utf-8 (raises UnicodeError)
793+
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len); // input data must be valid utf-8
794+
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr); // will check utf-8 (raises UnicodeError)
795795
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
796-
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // only use when vstr is already known to be utf-8 encoded
796+
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // input data must be valid utf-8
797797
#else
798798
#define mp_obj_new_str_from_utf8_vstr mp_obj_new_str_from_vstr
799799
#endif

py/objstr.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,7 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
202202
} else {
203203
mp_buffer_info_t bufinfo;
204204
mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
205-
#if MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
206-
if (!utf8_check(bufinfo.buf, bufinfo.len)) {
207-
mp_raise_msg(&mp_type_UnicodeError, NULL);
208-
}
209-
#endif
205+
// This will utf-8 check the input.
210206
return mp_obj_new_str(bufinfo.buf, bufinfo.len);
211207
}
212208
}
@@ -2268,6 +2264,11 @@ mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr) {
22682264
}
22692265

22702266
mp_obj_t mp_obj_new_str(const char *data, size_t len) {
2267+
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
2268+
if (!utf8_check((byte *)data, len)) {
2269+
mp_raise_msg(&mp_type_UnicodeError, NULL);
2270+
}
2271+
#endif
22712272
qstr q = qstr_find_strn(data, len);
22722273
if (q != MP_QSTRnull) {
22732274
// qstr with this data already exists

py/objstr.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t
8888
void mp_str_print_json(const mp_print_t *print, const byte *str_data, size_t str_len);
8989
mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs);
9090
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args);
91-
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len);
92-
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len);
91+
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, input data must be valid utf-8
92+
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, will check utf-8 (raises UnicodeError)
9393

9494
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
9595
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);

0 commit comments

Comments
 (0)