Skip to content

Commit da5ddfc

Browse files
jimmodpgeorge
authored andcommitted
hashlib: Refactor, split, and optimise.
This splits out each algorithm into its own extension package, so that only the necessary algorithms can be installed. This allows for a significant reduction in RAM and flash. i.e. previously installing hashlib meant that all algorithms were imported. Additionally ensures that any built-in hash algorithms (from uhashlib) are still exposed (e.g. `md5`), and retains the existing behavior to use the built-in preferentially. Also includes a refactoring of the algorithms to reduce code size and reduce the number of allocations they do as well as using bytearrays in place of list-of-int where possible. Add more comprehensive tests (using unittest). This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <[email protected]>
1 parent 2fba6b8 commit da5ddfc

File tree

21 files changed

+924
-883
lines changed

21 files changed

+924
-883
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Use built-in algorithms preferentially (on many ports this is just sha256).
2+
try:
3+
from uhashlib import *
4+
except ImportError:
5+
pass
6+
7+
8+
# Add missing algorithms based on installed extensions.
9+
def _init():
10+
for algo in ("sha224", "sha256", "sha384", "sha512"):
11+
if algo not in globals():
12+
try:
13+
# from ._{algo} import {algo}
14+
c = __import__("_" + algo, None, None, (), 1)
15+
globals()[algo] = getattr(c, algo)
16+
except ImportError:
17+
pass
18+
19+
20+
_init()
21+
del _init
22+
23+
24+
def new(algo, data=b""):
25+
try:
26+
c = globals()[algo]
27+
return c(data)
28+
except KeyError:
29+
raise ValueError(algo)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# MIT license; Copyright (c) 2023 Jim Mussared
2+
# Originally ported from CPython by Paul Sokolovsky
3+
4+
5+
# Base class for SHA implementations, which must provide:
6+
# .digestsize & .digest_size
7+
# .block_size
8+
# ._iv
9+
# ._update
10+
# ._final
11+
class sha:
12+
def __init__(self, s=None):
13+
self._digest = self._iv[:]
14+
self._count_lo = 0
15+
self._count_hi = 0
16+
self._data = bytearray(self.block_size)
17+
self._local = 0
18+
self._digestsize = self.digest_size
19+
if s:
20+
self.update(s)
21+
22+
def update(self, s):
23+
if isinstance(s, str):
24+
s = s.encode("ascii")
25+
else:
26+
s = bytes(s)
27+
self._update(s)
28+
29+
def digest(self):
30+
return self.copy()._final()[: self._digestsize]
31+
32+
def hexdigest(self):
33+
return "".join(["%.2x" % i for i in self.digest()])
34+
35+
def copy(self):
36+
new = type(self)()
37+
new._digest = self._digest[:]
38+
new._count_lo = self._count_lo
39+
new._count_hi = self._count_hi
40+
new._data = self._data[:]
41+
new._local = self._local
42+
return new
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
metadata(version="1.0")
2+
3+
package("hashlib")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# MIT license; Copyright (c) 2023 Jim Mussared
2+
# Originally ported from CPython by Paul Sokolovsky
3+
4+
from ._sha256 import sha256
5+
6+
7+
class sha224(sha256):
8+
digest_size = digestsize = 28
9+
_iv = [
10+
0xC1059ED8,
11+
0x367CD507,
12+
0x3070DD17,
13+
0xF70E5939,
14+
0xFFC00B31,
15+
0x68581511,
16+
0x64F98FA7,
17+
0xBEFA4FA4,
18+
]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
metadata(version="1.0", description="Adds the SHA224 hash algorithm to hashlib.")
2+
3+
require("hashlib-sha256")
4+
package("hashlib")
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# MIT license; Copyright (c) 2023 Jim Mussared
2+
# Originally ported from CPython by Paul Sokolovsky
3+
4+
from ._sha import sha
5+
6+
_SHA_BLOCKSIZE = const(64)
7+
8+
9+
ROR = lambda x, y: (((x & 0xFFFFFFFF) >> (y & 31)) | (x << (32 - (y & 31)))) & 0xFFFFFFFF
10+
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
11+
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
12+
S = lambda x, n: ROR(x, n)
13+
R = lambda x, n: (x & 0xFFFFFFFF) >> n
14+
Sigma0 = lambda x: (S(x, 2) ^ S(x, 13) ^ S(x, 22))
15+
Sigma1 = lambda x: (S(x, 6) ^ S(x, 11) ^ S(x, 25))
16+
Gamma0 = lambda x: (S(x, 7) ^ S(x, 18) ^ R(x, 3))
17+
Gamma1 = lambda x: (S(x, 17) ^ S(x, 19) ^ R(x, 10))
18+
19+
20+
class sha256(sha):
21+
digest_size = digestsize = 32
22+
block_size = _SHA_BLOCKSIZE
23+
_iv = [
24+
0x6A09E667,
25+
0xBB67AE85,
26+
0x3C6EF372,
27+
0xA54FF53A,
28+
0x510E527F,
29+
0x9B05688C,
30+
0x1F83D9AB,
31+
0x5BE0CD19,
32+
]
33+
34+
def _transform(self):
35+
W = []
36+
37+
d = self._data
38+
for i in range(0, 16):
39+
W.append((d[4 * i] << 24) + (d[4 * i + 1] << 16) + (d[4 * i + 2] << 8) + d[4 * i + 3])
40+
41+
for i in range(16, 64):
42+
W.append((Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]) & 0xFFFFFFFF)
43+
44+
ss = self._digest[:]
45+
46+
def RND(a, b, c, d, e, f, g, h, i, ki):
47+
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]
48+
t1 = Sigma0(a) + Maj(a, b, c)
49+
d += t0
50+
h = t0 + t1
51+
return d & 0xFFFFFFFF, h & 0xFFFFFFFF
52+
53+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 0, 0x428A2F98)
54+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 1, 0x71374491)
55+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 2, 0xB5C0FBCF)
56+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 3, 0xE9B5DBA5)
57+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 4, 0x3956C25B)
58+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 5, 0x59F111F1)
59+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 6, 0x923F82A4)
60+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 7, 0xAB1C5ED5)
61+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 8, 0xD807AA98)
62+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 9, 0x12835B01)
63+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 10, 0x243185BE)
64+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 11, 0x550C7DC3)
65+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 12, 0x72BE5D74)
66+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 13, 0x80DEB1FE)
67+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 14, 0x9BDC06A7)
68+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 15, 0xC19BF174)
69+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 16, 0xE49B69C1)
70+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 17, 0xEFBE4786)
71+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 18, 0x0FC19DC6)
72+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 19, 0x240CA1CC)
73+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 20, 0x2DE92C6F)
74+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 21, 0x4A7484AA)
75+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 22, 0x5CB0A9DC)
76+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 23, 0x76F988DA)
77+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 24, 0x983E5152)
78+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 25, 0xA831C66D)
79+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 26, 0xB00327C8)
80+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 27, 0xBF597FC7)
81+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 28, 0xC6E00BF3)
82+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 29, 0xD5A79147)
83+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 30, 0x06CA6351)
84+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 31, 0x14292967)
85+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 32, 0x27B70A85)
86+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 33, 0x2E1B2138)
87+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 34, 0x4D2C6DFC)
88+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 35, 0x53380D13)
89+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 36, 0x650A7354)
90+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 37, 0x766A0ABB)
91+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 38, 0x81C2C92E)
92+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 39, 0x92722C85)
93+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 40, 0xA2BFE8A1)
94+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 41, 0xA81A664B)
95+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 42, 0xC24B8B70)
96+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 43, 0xC76C51A3)
97+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 44, 0xD192E819)
98+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 45, 0xD6990624)
99+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 46, 0xF40E3585)
100+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 47, 0x106AA070)
101+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 48, 0x19A4C116)
102+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 49, 0x1E376C08)
103+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 50, 0x2748774C)
104+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 51, 0x34B0BCB5)
105+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 52, 0x391C0CB3)
106+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 53, 0x4ED8AA4A)
107+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 54, 0x5B9CCA4F)
108+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 55, 0x682E6FF3)
109+
ss[3], ss[7] = RND(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], 56, 0x748F82EE)
110+
ss[2], ss[6] = RND(ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], 57, 0x78A5636F)
111+
ss[1], ss[5] = RND(ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], 58, 0x84C87814)
112+
ss[0], ss[4] = RND(ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], ss[4], 59, 0x8CC70208)
113+
ss[7], ss[3] = RND(ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], ss[3], 60, 0x90BEFFFA)
114+
ss[6], ss[2] = RND(ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], ss[2], 61, 0xA4506CEB)
115+
ss[5], ss[1] = RND(ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], ss[1], 62, 0xBEF9A3F7)
116+
ss[4], ss[0] = RND(ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], ss[0], 63, 0xC67178F2)
117+
118+
for i in range(len(self._digest)):
119+
self._digest[i] = (self._digest[i] + ss[i]) & 0xFFFFFFFF
120+
121+
def _update(self, buffer):
122+
if isinstance(buffer, str):
123+
raise TypeError("Unicode strings must be encoded before hashing")
124+
count = len(buffer)
125+
buffer_idx = 0
126+
clo = (self._count_lo + (count << 3)) & 0xFFFFFFFF
127+
if clo < self._count_lo:
128+
self._count_hi += 1
129+
self._count_lo = clo
130+
131+
self._count_hi += count >> 29
132+
133+
if self._local:
134+
i = _SHA_BLOCKSIZE - self._local
135+
if i > count:
136+
i = count
137+
138+
# copy buffer
139+
for x in enumerate(buffer[buffer_idx : buffer_idx + i]):
140+
self._data[self._local + x[0]] = x[1]
141+
142+
count -= i
143+
buffer_idx += i
144+
145+
self._local += i
146+
if self._local == _SHA_BLOCKSIZE:
147+
self._transform()
148+
self._local = 0
149+
else:
150+
return
151+
152+
while count >= _SHA_BLOCKSIZE:
153+
# copy buffer
154+
self._data = bytearray(buffer[buffer_idx : buffer_idx + _SHA_BLOCKSIZE])
155+
count -= _SHA_BLOCKSIZE
156+
buffer_idx += _SHA_BLOCKSIZE
157+
self._transform()
158+
159+
# copy buffer
160+
pos = self._local
161+
self._data[pos : pos + count] = buffer[buffer_idx : buffer_idx + count]
162+
self._local = count
163+
164+
def _final(self):
165+
lo_bit_count = self._count_lo
166+
hi_bit_count = self._count_hi
167+
count = (lo_bit_count >> 3) & 0x3F
168+
self._data[count] = 0x80
169+
count += 1
170+
if count > _SHA_BLOCKSIZE - 8:
171+
# zero the bytes in data after the count
172+
self._data = self._data[:count] + bytes(_SHA_BLOCKSIZE - count)
173+
self._transform()
174+
# zero bytes in data
175+
self._data = bytearray(_SHA_BLOCKSIZE)
176+
else:
177+
self._data = self._data[:count] + bytes(_SHA_BLOCKSIZE - count)
178+
179+
self._data[56] = (hi_bit_count >> 24) & 0xFF
180+
self._data[57] = (hi_bit_count >> 16) & 0xFF
181+
self._data[58] = (hi_bit_count >> 8) & 0xFF
182+
self._data[59] = (hi_bit_count >> 0) & 0xFF
183+
self._data[60] = (lo_bit_count >> 24) & 0xFF
184+
self._data[61] = (lo_bit_count >> 16) & 0xFF
185+
self._data[62] = (lo_bit_count >> 8) & 0xFF
186+
self._data[63] = (lo_bit_count >> 0) & 0xFF
187+
188+
self._transform()
189+
190+
dig = bytearray()
191+
for i in self._digest:
192+
for j in range(4):
193+
dig.append((i >> ((3 - j) * 8)) & 0xFF)
194+
return dig
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
metadata(version="1.0", description="Adds the SHA256 hash algorithm to hashlib.")
2+
3+
require("hashlib-core")
4+
package("hashlib")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# MIT license; Copyright (c) 2023 Jim Mussared
2+
# Originally ported from CPython by Paul Sokolovsky
3+
4+
from ._sha512 import sha512
5+
6+
7+
class sha384(sha512):
8+
digest_size = digestsize = 48
9+
_iv = [
10+
0xCBBB9D5DC1059ED8,
11+
0x629A292A367CD507,
12+
0x9159015A3070DD17,
13+
0x152FECD8F70E5939,
14+
0x67332667FFC00B31,
15+
0x8EB44A8768581511,
16+
0xDB0C2E0D64F98FA7,
17+
0x47B5481DBEFA4FA4,
18+
]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
metadata(version="1.0", description="Adds the SHA384 hash algorithm to hashlib.")
2+
3+
require("hashlib-sha512")
4+
package("hashlib")

0 commit comments

Comments
 (0)