10
10
import struct
11
11
import array
12
12
13
+ NPY_MAGIC = b'\x93 NUMPY'
14
+
13
15
format_mapping = {
14
16
# npy format => (array.array typecode, itemsize in bytes)
15
17
b'f8' : ('d' , 8 ),
@@ -23,9 +25,23 @@ def find_section(data, prefix, suffix):
23
25
section = data [start :end ]
24
26
return section
25
27
26
- def array_from_bytes (typecode , buffer ):
27
- # Workaround due
28
- return array .array (typecode , buffer )
28
+ def array_tobytes_generator (arr ):
29
+ # array.array.tobytes() is missing in MicroPython =/
30
+ typecode = array_typecode (arr )
31
+ for item in arr :
32
+ buf = struct .pack (typecode , item )
33
+ yield buf
34
+
35
+
36
+ def array_typecode (arr ):
37
+ typecode = str (arr )[7 :8 ]
38
+ return typecode
39
+
40
+ def compute_items (shape ):
41
+ total_items = 1
42
+ for d in shape :
43
+ total_items *= d
44
+ return total_items
29
45
30
46
class Reader ():
31
47
@@ -38,7 +54,6 @@ def __init__(self, filelike, header_maxlength=16*10):
38
54
39
55
self .header_maxlength = header_maxlength
40
56
41
-
42
57
def close (self ):
43
58
if self .file :
44
59
self .file .close ()
@@ -57,22 +72,21 @@ def _read_header(self):
57
72
data = self .file .read (self .header_maxlength )
58
73
59
74
# Check magic
60
- npy_magic = b'\x93 NUMPY'
61
- magic = data [0 :len (npy_magic )]
62
- assert magic == npy_magic , magic
75
+ magic = data [0 :len (NPY_MAGIC )]
76
+ assert magic == NPY_MAGIC , magic
63
77
64
78
# Check version
65
- major , minor = struct .unpack_from ('BB' , data , len (npy_magic ))
79
+ major , minor = struct .unpack_from ('BB' , data , len (NPY_MAGIC ))
66
80
if major == 0x01 :
67
- header_length = struct .unpack_from ('<H' , data , len (npy_magic )+ 2 )[0 ]
68
- header_start = len (npy_magic )+ 2 + 2
81
+ header_length = struct .unpack_from ('<H' , data , len (NPY_MAGIC )+ 2 )[0 ]
82
+ header_start = len (NPY_MAGIC )+ 2 + 2
69
83
elif major == 0x02 :
70
- header_length = struct .unpack_from ('<I' , data , len (npy_magic )+ 2 )[0 ]
71
- header_start = len (npy_magic )+ 2 + 2
84
+ header_length = struct .unpack_from ('<I' , data , len (NPY_MAGIC )+ 2 )[0 ]
85
+ header_start = len (NPY_MAGIC )+ 2 + 4
72
86
else :
73
87
raise ValueError ("Unsupported npy format version" )
74
88
75
- # print('hs', header_start, data[header_start:header_start+header_length])
89
+ print ('hs' , header_start , data [header_start :header_start + header_length ])
76
90
77
91
# Parse header info
78
92
type_info = find_section (data , b"'descr': '" , b"'," )
@@ -104,10 +118,7 @@ def _read_header(self):
104
118
def read_data_chunks (self , chunksize ):
105
119
106
120
# determine amount of data expected
107
- total_items = 1
108
- for d in self .shape :
109
- total_items *= d
110
- total_data_bytes = self .itemsize * total_items
121
+ total_data_bytes = self .itemsize * compute_items (self .shape )
111
122
112
123
# read the data
113
124
self .file .seek (self .data_start )
@@ -116,19 +127,140 @@ def read_data_chunks(self, chunksize):
116
127
read_bytes = 0
117
128
while read_bytes < total_data_bytes :
118
129
sub = self .file .read (chunksize_bytes )
119
- arr = array_from_bytes (self .typecode , sub )
130
+ arr = array . array (self .typecode , sub )
120
131
yield arr
121
132
read_bytes += len (sub )
122
133
123
- def test_simple ():
134
+
135
+ class Writer ():
136
+ def __init__ (self , filelike , shape , typecode ):
137
+
138
+ if isinstance (filelike , str ):
139
+ self .file = open (filelike , 'wb' )
140
+ else :
141
+ self .file = filelike
142
+
143
+ self .typecode = typecode
144
+ self .shape = shape
145
+
146
+ def close (self ):
147
+ if self .file :
148
+ self .file .close ()
149
+ self .file = None
150
+
151
+ def __enter__ (self ):
152
+ self ._write_header ()
153
+ return self
154
+
155
+ def __exit__ (self , exc_type , exc_value , exc_tb ):
156
+ self .close ()
157
+
158
+ def _write_header (self ):
159
+ shape = self .shape
160
+ typecode = self .typecode
161
+
162
+ # Sanity checking
163
+ dimensions = len (shape )
164
+ assert dimensions >= 1 , dimensions
165
+ assert dimensions <= 5 , dimensions
166
+
167
+ # Construct header info
168
+ dtype = '<f4' # FIXME: unhardcode
169
+ shape_str = ',' .join ((str (d ) for d in shape ))
170
+
171
+ header = f"{{'descr': '{ dtype } ', 'fortran_order': False, 'shape': ({ shape_str } ), }}"
172
+
173
+ # Padded to ensure data start is aligened to 16 bytes
174
+ data_start = len (NPY_MAGIC )+ 2 + 2 + len (header )
175
+ padding = 16 - (data_start % 16 )
176
+ header = header + (' ' * padding )
177
+ header_length = len (header )
178
+ data_start = len (NPY_MAGIC )+ 2 + 2 + len (header )
179
+ assert data_start % 16 == 0 , data_start
180
+
181
+ self .file .write (NPY_MAGIC )
182
+ self .file .write (bytes ([0x01 , 0x00 ])) # version
183
+ self .file .write (struct .pack ('<H' , header_length ))
184
+ header_data = header .encode ('ascii' )
185
+ assert len (header_data ) == len (header )
186
+ self .file .write (header_data )
187
+
188
+ # ready to write data
189
+
190
+ def write_values (self , arr ):
191
+ input_typecode = array_typecode (arr )
192
+ assert input_typecode == self .typecode , (input_typecode , self .typecode )
193
+
194
+ for buf in array_tobytes_generator (arr ):
195
+ self .file .write (buf )
196
+
197
+
198
+ def load (filelike ) -> tuple [tuple , array .array ]:
199
+ """
200
+ Load array from .npy file
201
+
202
+ Convenience function for doing it in one shot.
203
+ For streaming, use npyfile.Reader instead
204
+ """
205
+
206
+ chunks = []
207
+ with Reader (filelike ) as reader :
208
+ # Just read everything in one chunk
209
+ total_items = compute_items (reader .shape )
210
+ for c in reader .read_data_chunks (total_items ):
211
+ chunks .append (c )
212
+
213
+ assert len (chunks ) == 1
214
+ return reader .shape , chunks [0 ]
215
+
216
+ def save (filelike , arr : array .array , shape = None ):
217
+ """
218
+ Save array as .npy file
219
+
220
+ Convenience function for doing it in one shot.
221
+ For streaming, use npyfile.Writer instead
222
+ """
223
+
224
+ if shape is None :
225
+ # default to 1d
226
+ shape = (len (arr ), )
227
+
228
+ typecode = array_typecode (arr )
229
+ total = compute_items (shape )
230
+ assert total == len (arr ), (shape , total , len (arr ))
231
+
232
+ with Writer (filelike , shape , typecode ) as f :
233
+ f .write_values (arr )
234
+
235
+
236
+ def test_reader_simple ():
124
237
125
238
with Reader ('benchmarks/iir/noise.npy' ) as reader :
126
239
print (reader .shape , reader .typecode , reader .itemsize )
127
240
128
- for s in reader .read_data_chunks (100 ):
129
- print (s )
241
+ for s in reader .read_data_chunks (500 ):
242
+ print (len (s ))
243
+
244
+
245
+ def test_writer_simple ():
246
+
247
+ size = 100
248
+ arr = array .array ('f' , (i for i in range (size )))
249
+ shape = (size , )
250
+
251
+ path = 'out.npy'
252
+
253
+ # can be saved successfully
254
+ save (path , arr , shape = shape )
255
+
256
+ # can be loaded back up again
257
+ loaded_shape , loaded_arr = load (path )
258
+ assert loaded_shape == shape
259
+ assert list (arr ) == list (loaded_arr )
260
+
130
261
131
- test_simple ()
262
+ test_reader_simple ()
263
+ test_writer_simple ()
132
264
133
265
# testcases
134
266
# supported
0 commit comments