Skip to content

Commit 3a910b1

Browse files
jimmodpgeorge
authored andcommitted
py/objstr: Optimise mp_obj_new_str_from_vstr for known-safe strings.
The new `mp_obj_new_str_from_utf8_vstr` can be used when you know you already have a unicode-safe string. Signed-off-by: Jim Mussared <[email protected]>
1 parent 8886458 commit 3a910b1

File tree

8 files changed

+19
-7
lines changed

8 files changed

+19
-7
lines changed

extmod/modujson.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ STATIC mp_obj_t mod_ujson_dump_helper(size_t n_args, const mp_obj_t *pos_args, m
6767
vstr_t vstr;
6868
vstr_init_print(&vstr, 8, &print_ext.base);
6969
mp_obj_print_helper(&print_ext.base, pos_args[0], PRINT_JSON);
70-
return mp_obj_new_str_from_vstr(&vstr);
70+
return mp_obj_new_str_from_utf8_vstr(&vstr);
7171
} else {
7272
// dump(obj, stream)
7373
print_ext.base.data = MP_OBJ_TO_PTR(pos_args[1]);
@@ -103,7 +103,7 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
103103
mp_print_t print;
104104
vstr_init_print(&vstr, 8, &print);
105105
mp_obj_print_helper(&print, obj, PRINT_JSON);
106-
return mp_obj_new_str_from_vstr(&vstr);
106+
return mp_obj_new_str_from_utf8_vstr(&vstr);
107107
}
108108
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
109109

extmod/modure.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
373373
vstr_add_strn(&vstr_return, subj.begin, subj.end - subj.begin);
374374

375375
if (mp_obj_get_type(where) == &mp_type_str) {
376-
return mp_obj_new_str_from_vstr(&vstr_return);
376+
return mp_obj_new_str_from_utf8_vstr(&vstr_return);
377377
} else {
378378
return mp_obj_new_bytes_from_vstr(&vstr_return);
379379
}

ports/unix/modusocket.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ STATIC mp_obj_t mod_socket_inet_ntop(mp_obj_t family_in, mp_obj_t binaddr_in) {
559559
mp_raise_OSError(errno);
560560
}
561561
vstr.len = strlen(vstr.buf);
562-
return mp_obj_new_str_from_vstr(&vstr);
562+
return mp_obj_new_str_from_utf8_vstr(&vstr);
563563
}
564564
STATIC MP_DEFINE_CONST_FUN_OBJ_2(mod_socket_inet_ntop_obj, mod_socket_inet_ntop);
565565

py/modbuiltins.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ STATIC mp_obj_t mp_builtin_repr(mp_obj_t o_in) {
467467
mp_print_t print;
468468
vstr_init_print(&vstr, 16, &print);
469469
mp_obj_print_helper(&print, o_in, PRINT_REPR);
470-
return mp_obj_new_str_from_vstr(&vstr);
470+
return mp_obj_new_str_from_utf8_vstr(&vstr);
471471
}
472472
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_repr_obj, mp_builtin_repr);
473473

py/obj.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,11 @@ mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a
792792
mp_obj_t mp_obj_new_str(const char *data, size_t len);
793793
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len);
794794
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr);
795+
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
796+
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // only use when vstr is already known to be utf-8 encoded
797+
#else
798+
#define mp_obj_new_str_from_utf8_vstr mp_obj_new_str_from_vstr
799+
#endif
795800
mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr);
796801
mp_obj_t mp_obj_new_bytes(const byte *data, size_t len);
797802
mp_obj_t mp_obj_new_bytearray(size_t n, const void *items);

py/objstr.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,6 +2256,13 @@ mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr) {
22562256
return mp_obj_new_str_type_from_vstr(&mp_type_str, vstr);
22572257
}
22582258

2259+
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
2260+
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr) {
2261+
// bypasses utf8_check.
2262+
return mp_obj_new_str_type_from_vstr(&mp_type_str, vstr);
2263+
}
2264+
#endif // MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
2265+
22592266
mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr) {
22602267
return mp_obj_new_str_type_from_vstr(&mp_type_bytes, vstr);
22612268
}

py/parsenum.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
160160
mp_printf(&print, "invalid syntax for integer with base %d: ", base);
161161
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
162162
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
163-
mp_obj_new_str_from_vstr(&vstr));
163+
mp_obj_new_str_from_utf8_vstr(&vstr));
164164
raise_exc(exc, lex);
165165
#endif
166166
}

py/persistentcode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) {
203203
if (obj_type == MP_PERSISTENT_OBJ_STR || obj_type == MP_PERSISTENT_OBJ_BYTES) {
204204
read_byte(reader); // skip null terminator
205205
if (obj_type == MP_PERSISTENT_OBJ_STR) {
206-
return mp_obj_new_str_from_vstr(&vstr);
206+
return mp_obj_new_str_from_utf8_vstr(&vstr);
207207
} else {
208208
return mp_obj_new_bytes_from_vstr(&vstr);
209209
}

0 commit comments

Comments
 (0)