Skip to content

Commit 6c2e116

Browse files
committed
npyfile: Start implementing Writer, add save/load convenience
1 parent 64df703 commit 6c2e116

File tree

1 file changed

+154
-22
lines changed

1 file changed

+154
-22
lines changed

benchmarks/iir/npyfile.py

Lines changed: 154 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import struct
1111
import array
1212

13+
NPY_MAGIC = b'\x93NUMPY'
14+
1315
format_mapping = {
1416
# npy format => (array.array typecode, itemsize in bytes)
1517
b'f8': ('d', 8),
@@ -23,9 +25,23 @@ def find_section(data, prefix, suffix):
2325
section = data[start:end]
2426
return section
2527

26-
def array_from_bytes(typecode, buffer):
27-
# Workaround due
28-
return array.array(typecode, buffer)
28+
def array_tobytes_generator(arr):
29+
# array.array.tobytes() is missing in MicroPython =/
30+
typecode = array_typecode(arr)
31+
for item in arr:
32+
buf = struct.pack(typecode, item)
33+
yield buf
34+
35+
36+
def array_typecode(arr):
37+
typecode = str(arr)[7:8]
38+
return typecode
39+
40+
def compute_items(shape):
41+
total_items = 1
42+
for d in shape:
43+
total_items *= d
44+
return total_items
2945

3046
class Reader():
3147

@@ -38,7 +54,6 @@ def __init__(self, filelike, header_maxlength=16*10):
3854

3955
self.header_maxlength = header_maxlength
4056

41-
4257
def close(self):
4358
if self.file:
4459
self.file.close()
@@ -57,22 +72,21 @@ def _read_header(self):
5772
data = self.file.read(self.header_maxlength)
5873

5974
# Check magic
60-
npy_magic = b'\x93NUMPY'
61-
magic = data[0:len(npy_magic)]
62-
assert magic == npy_magic, magic
75+
magic = data[0:len(NPY_MAGIC)]
76+
assert magic == NPY_MAGIC, magic
6377

6478
# Check version
65-
major, minor = struct.unpack_from('BB', data, len(npy_magic))
79+
major, minor = struct.unpack_from('BB', data, len(NPY_MAGIC))
6680
if major == 0x01:
67-
header_length = struct.unpack_from('<H', data, len(npy_magic)+2)[0]
68-
header_start = len(npy_magic)+2+2
81+
header_length = struct.unpack_from('<H', data, len(NPY_MAGIC)+2)[0]
82+
header_start = len(NPY_MAGIC)+2+2
6983
elif major == 0x02:
70-
header_length = struct.unpack_from('<I', data, len(npy_magic)+2)[0]
71-
header_start = len(npy_magic)+2+2
84+
header_length = struct.unpack_from('<I', data, len(NPY_MAGIC)+2)[0]
85+
header_start = len(NPY_MAGIC)+2+4
7286
else:
7387
raise ValueError("Unsupported npy format version")
7488

75-
#print('hs', header_start, data[header_start:header_start+header_length])
89+
print('hs', header_start, data[header_start:header_start+header_length])
7690

7791
# Parse header info
7892
type_info = find_section(data, b"'descr': '", b"',")
@@ -104,10 +118,7 @@ def _read_header(self):
104118
def read_data_chunks(self, chunksize):
105119

106120
# determine amount of data expected
107-
total_items = 1
108-
for d in self.shape:
109-
total_items *= d
110-
total_data_bytes = self.itemsize * total_items
121+
total_data_bytes = self.itemsize * compute_items(self.shape)
111122

112123
# read the data
113124
self.file.seek(self.data_start)
@@ -116,19 +127,140 @@ def read_data_chunks(self, chunksize):
116127
read_bytes = 0
117128
while read_bytes < total_data_bytes:
118129
sub = self.file.read(chunksize_bytes)
119-
arr = array_from_bytes(self.typecode, sub)
130+
arr = array.array(self.typecode, sub)
120131
yield arr
121132
read_bytes += len(sub)
122133

123-
def test_simple():
134+
135+
class Writer():
136+
def __init__(self, filelike, shape, typecode):
137+
138+
if isinstance(filelike, str):
139+
self.file = open(filelike, 'wb')
140+
else:
141+
self.file = filelike
142+
143+
self.typecode = typecode
144+
self.shape = shape
145+
146+
def close(self):
147+
if self.file:
148+
self.file.close()
149+
self.file = None
150+
151+
def __enter__(self):
152+
self._write_header()
153+
return self
154+
155+
def __exit__(self, exc_type, exc_value, exc_tb):
156+
self.close()
157+
158+
def _write_header(self):
159+
shape = self.shape
160+
typecode = self.typecode
161+
162+
# Sanity checking
163+
dimensions = len(shape)
164+
assert dimensions >= 1, dimensions
165+
assert dimensions <= 5, dimensions
166+
167+
# Construct header info
168+
dtype = '<f4' # FIXME: unhardcode
169+
shape_str = ','.join((str(d) for d in shape))
170+
171+
header = f"{{'descr': '{dtype}', 'fortran_order': False, 'shape': ({shape_str}), }}"
172+
173+
# Padded to ensure data start is aligened to 16 bytes
174+
data_start = len(NPY_MAGIC)+2+2+len(header)
175+
padding = 16-(data_start % 16)
176+
header = header + (' ' * padding)
177+
header_length = len(header)
178+
data_start = len(NPY_MAGIC)+2+2+len(header)
179+
assert data_start % 16 == 0, data_start
180+
181+
self.file.write(NPY_MAGIC)
182+
self.file.write(bytes([0x01, 0x00])) # version
183+
self.file.write(struct.pack('<H', header_length))
184+
header_data = header.encode('ascii')
185+
assert len(header_data) == len(header)
186+
self.file.write(header_data)
187+
188+
# ready to write data
189+
190+
def write_values(self, arr):
191+
input_typecode = array_typecode(arr)
192+
assert input_typecode == self.typecode, (input_typecode, self.typecode)
193+
194+
for buf in array_tobytes_generator(arr):
195+
self.file.write(buf)
196+
197+
198+
def load(filelike) -> tuple[tuple, array.array]:
199+
"""
200+
Load array from .npy file
201+
202+
Convenience function for doing it in one shot.
203+
For streaming, use npyfile.Reader instead
204+
"""
205+
206+
chunks = []
207+
with Reader(filelike) as reader:
208+
# Just read everything in one chunk
209+
total_items = compute_items(reader.shape)
210+
for c in reader.read_data_chunks(total_items):
211+
chunks.append(c)
212+
213+
assert len(chunks) == 1
214+
return reader.shape, chunks[0]
215+
216+
def save(filelike, arr : array.array, shape=None):
217+
"""
218+
Save array as .npy file
219+
220+
Convenience function for doing it in one shot.
221+
For streaming, use npyfile.Writer instead
222+
"""
223+
224+
if shape is None:
225+
# default to 1d
226+
shape = (len(arr), )
227+
228+
typecode = array_typecode(arr)
229+
total = compute_items(shape)
230+
assert total == len(arr), (shape, total, len(arr))
231+
232+
with Writer(filelike, shape, typecode) as f:
233+
f.write_values(arr)
234+
235+
236+
def test_reader_simple():
124237

125238
with Reader('benchmarks/iir/noise.npy') as reader:
126239
print(reader.shape, reader.typecode, reader.itemsize)
127240

128-
for s in reader.read_data_chunks(100):
129-
print(s)
241+
for s in reader.read_data_chunks(500):
242+
print(len(s))
243+
244+
245+
def test_writer_simple():
246+
247+
size = 100
248+
arr = array.array('f', (i for i in range(size)))
249+
shape = (size, )
250+
251+
path = 'out.npy'
252+
253+
# can be saved successfully
254+
save(path, arr, shape=shape)
255+
256+
# can be loaded back up again
257+
loaded_shape, loaded_arr = load(path)
258+
assert loaded_shape == shape
259+
assert list(arr) == list(loaded_arr)
260+
130261

131-
test_simple()
262+
test_reader_simple()
263+
test_writer_simple()
132264

133265
# testcases
134266
# supported

0 commit comments

Comments
 (0)