Skip to content

Commit 7ea5039

Browse files
committed
py/qstr: Add support for MICROPY_QSTR_BYTES_IN_HASH=0.
This disables using qstr hashes altogether, which saves RAM and flash (two bytes per interned string on a typical build) as well as code size. On PYBV11 this is worth over 3k flash. qstr comparison will now be done just by length then data. This affects qstr_find_strn although this has a negligible performance impact as, for a given comparison, the length and first character will ~usually be different anyway. String hashing (e.g. builtin `hash()` and map.c) now need to compute the hash dynamically, and for the map case this does come at a performance cost. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <[email protected]>
1 parent 307ecc5 commit 7ea5039

File tree

4 files changed

+66
-23
lines changed

4 files changed

+66
-23
lines changed

py/makeqstrdata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ def compute_hash(qstr, bytes_hash):
295295
for b in qstr:
296296
hash = (hash * 33) ^ b
297297
# Make sure that valid hash is never zero, zero means "hash not computed"
298-
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
298+
# if bytes_hash is zero, assume a 16-bit mask (to match qstr.c)
299+
return (hash & ((1 << (8 * (bytes_hash or 2))) - 1)) or 1
299300

300301

301302
def qstr_escape(qst):

py/qstr.c

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@
4242
// A qstr is an index into the qstr pool.
4343
// The data for a qstr is \0 terminated (so they can be printed using printf)
4444

45+
#if MICROPY_QSTR_BYTES_IN_HASH
4546
#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
47+
#else
48+
#define Q_HASH_MASK (0xffff)
49+
#endif
4650

4751
#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
4852
#define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
@@ -77,6 +81,7 @@ size_t qstr_compute_hash(const byte *data, size_t len) {
7781
// future .mpy version we could re-order them and make it sorted). It also
7882
// contains additional qstrs that must have IDs <256, see operator_qstr_list
7983
// in makeqstrdata.py.
84+
#if MICROPY_QSTR_BYTES_IN_HASH
8085
const qstr_hash_t mp_qstr_const_hashes_static[] = {
8186
#ifndef NO_QSTR
8287
#define QDEF0(id, hash, len, str) hash,
@@ -86,6 +91,7 @@ const qstr_hash_t mp_qstr_const_hashes_static[] = {
8691
#undef QDEF1
8792
#endif
8893
};
94+
#endif
8995

9096
const qstr_len_t mp_qstr_const_lengths_static[] = {
9197
#ifndef NO_QSTR
@@ -103,7 +109,9 @@ const qstr_pool_t mp_qstr_const_pool_static = {
103109
false, // is_sorted
104110
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
105111
MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
112+
#if MICROPY_QSTR_BYTES_IN_HASH
106113
(qstr_hash_t *)mp_qstr_const_hashes_static,
114+
#endif
107115
(qstr_len_t *)mp_qstr_const_lengths_static,
108116
{
109117
#ifndef NO_QSTR
@@ -118,6 +126,7 @@ const qstr_pool_t mp_qstr_const_pool_static = {
118126

119127
// The next pool is the remainder of the qstrs defined in the firmware. This
120128
// is sorted.
129+
#if MICROPY_QSTR_BYTES_IN_HASH
121130
const qstr_hash_t mp_qstr_const_hashes[] = {
122131
#ifndef NO_QSTR
123132
#define QDEF0(id, hash, len, str)
@@ -127,6 +136,7 @@ const qstr_hash_t mp_qstr_const_hashes[] = {
127136
#undef QDEF1
128137
#endif
129138
};
139+
#endif
130140

131141
const qstr_len_t mp_qstr_const_lengths[] = {
132142
#ifndef NO_QSTR
@@ -144,7 +154,9 @@ const qstr_pool_t mp_qstr_const_pool = {
144154
true, // is_sorted
145155
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
146156
MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
157+
#if MICROPY_QSTR_BYTES_IN_HASH
147158
(qstr_hash_t *)mp_qstr_const_hashes,
159+
#endif
148160
(qstr_len_t *)mp_qstr_const_lengths,
149161
{
150162
#ifndef NO_QSTR
@@ -188,8 +200,13 @@ STATIC const qstr_pool_t *find_qstr(qstr *q) {
188200
}
189201

190202
// qstr_mutex must be taken while in this function
191-
STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
203+
STATIC qstr qstr_add(mp_uint_t len, const char *q_ptr) {
204+
#if MICROPY_QSTR_BYTES_IN_HASH
205+
mp_uint_t hash = qstr_compute_hash((const byte *)q_ptr, len);
192206
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
207+
#else
208+
DEBUG_printf("QSTR: add len=%d data=%.*s\n", len, len, q_ptr);
209+
#endif
193210

194211
// make sure we have room in the pool for a new qstr
195212
if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
@@ -199,7 +216,11 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
199216
new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
200217
#endif
201218
mp_uint_t pool_size = sizeof(qstr_pool_t)
202-
+ (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc;
219+
+ (sizeof(const char *)
220+
#if MICROPY_QSTR_BYTES_IN_HASH
221+
+ sizeof(qstr_hash_t)
222+
#endif
223+
+ sizeof(qstr_len_t)) * new_alloc;
203224
qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
204225
if (pool == NULL) {
205226
// Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
@@ -211,8 +232,12 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
211232
QSTR_EXIT();
212233
m_malloc_fail(new_alloc);
213234
}
235+
#if MICROPY_QSTR_BYTES_IN_HASH
214236
pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
215237
pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
238+
#else
239+
pool->lengths = (qstr_len_t *)(pool->qstrs + new_alloc);
240+
#endif
216241
pool->prev = MP_STATE_VM(last_pool);
217242
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
218243
pool->alloc = new_alloc;
@@ -223,7 +248,9 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
223248

224249
// add the new qstr
225250
mp_uint_t at = MP_STATE_VM(last_pool)->len;
251+
#if MICROPY_QSTR_BYTES_IN_HASH
226252
MP_STATE_VM(last_pool)->hashes[at] = hash;
253+
#endif
227254
MP_STATE_VM(last_pool)->lengths[at] = len;
228255
MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
229256
MP_STATE_VM(last_pool)->len++;
@@ -238,8 +265,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
238265
return MP_QSTR_;
239266
}
240267

268+
#if MICROPY_QSTR_BYTES_IN_HASH
241269
// work out hash of str
242270
size_t str_hash = qstr_compute_hash((const byte *)str, str_len);
271+
#endif
243272

244273
// search pools for the data
245274
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
@@ -261,7 +290,11 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
261290

262291
// sequential search for the remaining strings
263292
for (mp_uint_t at = low; at < high + 1; at++) {
264-
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
293+
if (
294+
#if MICROPY_QSTR_BYTES_IN_HASH
295+
pool->hashes[at] == str_hash &&
296+
#endif
297+
pool->lengths[at] == str_len
265298
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
266299
return pool->total_prev_len + at;
267300
}
@@ -329,18 +362,21 @@ qstr qstr_from_strn(const char *str, size_t len) {
329362
MP_STATE_VM(qstr_last_used) += n_bytes;
330363

331364
// store the interned strings' data
332-
size_t hash = qstr_compute_hash((const byte *)str, len);
333365
memcpy(q_ptr, str, len);
334366
q_ptr[len] = '\0';
335-
q = qstr_add(hash, len, q_ptr);
367+
q = qstr_add(len, q_ptr);
336368
}
337369
QSTR_EXIT();
338370
return q;
339371
}
340372

341373
mp_uint_t qstr_hash(qstr q) {
342374
const qstr_pool_t *pool = find_qstr(&q);
375+
#if MICROPY_QSTR_BYTES_IN_HASH
343376
return pool->hashes[q];
377+
#else
378+
return qstr_compute_hash((byte *)pool->qstrs[q], pool->lengths[q]);
379+
#endif
344380
}
345381

346382
size_t qstr_len(qstr q) {
@@ -375,7 +411,11 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si
375411
*n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
376412
#else
377413
*n_total_bytes += sizeof(qstr_pool_t)
378-
+ (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc;
414+
+ (sizeof(const char *)
415+
#if MICROPY_QSTR_BYTES_IN_HASH
416+
+ sizeof(qstr_hash_t)
417+
#endif
418+
+ sizeof(qstr_len_t)) * pool->alloc;
379419
#endif
380420
}
381421
*n_total_bytes += *n_str_data_bytes;

py/qstr.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ enum {
6060
typedef size_t qstr;
6161
typedef uint16_t qstr_short_t;
6262

63-
#if MICROPY_QSTR_BYTES_IN_HASH == 1
63+
#if MICROPY_QSTR_BYTES_IN_HASH == 0
64+
// No qstr_hash_t type needed.
65+
#elif MICROPY_QSTR_BYTES_IN_HASH == 1
6466
typedef uint8_t qstr_hash_t;
6567
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
6668
typedef uint16_t qstr_hash_t;
@@ -82,7 +84,9 @@ typedef struct _qstr_pool_t {
8284
size_t is_sorted : 1;
8385
size_t alloc;
8486
size_t len;
87+
#if MICROPY_QSTR_BYTES_IN_HASH
8588
qstr_hash_t *hashes;
89+
#endif
8690
qstr_len_t *lengths;
8791
const char *qstrs[];
8892
} qstr_pool_t;
@@ -92,6 +96,7 @@ typedef struct _qstr_pool_t {
9296
void qstr_init(void);
9397

9498
size_t qstr_compute_hash(const byte *data, size_t len);
99+
95100
qstr qstr_find_strn(const char *str, size_t str_len); // returns MP_QSTRnull if not found
96101

97102
qstr qstr_from_str(const char *str);

tools/mpy-tool.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,21 +1473,20 @@ def freeze_mpy(firmware_qstr_idents, compiled_modules):
14731473
raw_code_count = 0
14741474
raw_code_content = 0
14751475

1476-
print()
1477-
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
1478-
qstr_size = {"metadata": 0, "data": 0}
1479-
for _, _, _, qbytes in new:
1480-
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
1481-
print(" %d," % qhash)
1482-
print("};")
1476+
if config.MICROPY_QSTR_BYTES_IN_HASH:
1477+
print()
1478+
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
1479+
for _, _, _, qbytes in new:
1480+
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
1481+
print(" %d," % qhash)
1482+
qstr_content += config.MICROPY_QSTR_BYTES_IN_HASH
1483+
print("};")
14831484
print()
14841485
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
14851486
for _, _, _, qbytes in new:
14861487
print(" %d," % len(qbytes))
1487-
qstr_size["metadata"] += (
1488-
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
1489-
)
1490-
qstr_size["data"] += len(qbytes)
1488+
qstr_content += config.MICROPY_QSTR_BYTES_IN_LEN
1489+
qstr_content += len(qbytes) + 1 # include NUL
14911490
print("};")
14921491
print()
14931492
print("extern const qstr_pool_t mp_qstr_const_pool;")
@@ -1497,14 +1496,12 @@ def freeze_mpy(firmware_qstr_idents, compiled_modules):
14971496
print(" true, // is_sorted")
14981497
print(" %u, // allocated entries" % qstr_pool_alloc)
14991498
print(" %u, // used entries" % len(new))
1500-
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
1499+
if config.MICROPY_QSTR_BYTES_IN_HASH:
1500+
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
15011501
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
15021502
print(" {")
15031503
for _, _, qstr, qbytes in new:
15041504
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
1505-
qstr_content += (
1506-
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
1507-
)
15081505
print(" },")
15091506
print("};")
15101507

0 commit comments

Comments
 (0)