Skip to content

Commit 0085725

Browse files
committed
2 parents 4e16d29 + e910200 commit 0085725

File tree

8 files changed

+883
-2
lines changed

8 files changed

+883
-2
lines changed

Examples/profiling/bna_reader/ChesapeakeBay.bna

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
cython-optimized code for scanning text files and reading numbers out of them
3+
4+
based on the classic C fscanf
5+
6+
ONLY WORKS WITH PYTHON 2.*
7+
- py3 does give as easy access to the C file pointer.
8+
"""
9+
10+
import sys
11+
12+
cimport cython
13+
import numpy as np
14+
cimport numpy as cnp
15+
cimport libc
16+
from libc cimport stdio
17+
from libc.stdint cimport uint32_t, UINT32_MAX
18+
from cpython cimport *
19+
20+
21+
22+
23+
## NOTE: this to get the file pointer form the python file object
24+
## does not work in Py3!
25+
26+
# cdef extern from "Python.h":
27+
# ## dont know why I need this -- shouldn't it be in the cpython pxd?
28+
# stdio.FILE* PyFile_AsFile(PyObject*)
29+
# int PyFile_CheckExact(PyObject*)
30+
31+
# void fprintf(FILE* f, char* s, char* s)
32+
# Next, enter the builtin file class into the namespace:
33+
#cdef extern from "fileobject.h":
34+
# ctypedef class __builtin__.file [object PyFileObject]:
35+
# pass
36+
37+
cdef extern from "ctype.h":
38+
cdef int isspace( int )
39+
40+
cdef extern from "fileobject.h":
41+
cdef stdio.FILE *PyFile_AsFile(object) except NULL
42+
cdef int PyFile_CheckExact(object)
43+
44+
45+
def scan(infile, num_to_read=None):
46+
"""
47+
scan the file and return a numpy array of float64.
48+
49+
:param infile: the file to scan
50+
:type infile: and open python file object.
51+
52+
:param num_to_read=None: the number of values to read. If None,
53+
then reads all the numbers in the file.
54+
:type num_to_read: integer
55+
56+
Raises an TypeError if there are fewer than num_to_read numbers in the file.
57+
All text in the file that is not part of a floating point number is
58+
skipped over.
59+
60+
After reading num_to_read numbers, the file is left before the next
61+
non-whitespace character in the file. This will often leave the file
62+
at the start of the next line, after scanning a line full of numbers.
63+
"""
64+
65+
cdef uint32_t N, num_read, j
66+
67+
N = UINT32_MAX if num_to_read is None else num_to_read
68+
69+
## does all this checking cost too much?
70+
## and CheckExact is there later anyway...
71+
if ( (type(infile) is not file) or
72+
infile.closed or
73+
not ('r' in infile.mode or 'a' in infile.mode)
74+
):
75+
raise TypeError("infile must be an open file object")
76+
77+
## now to grab the C file handle
78+
cdef stdio.FILE* fp
79+
#cdef PyObject* py_file
80+
if PyFile_CheckExact(infile):
81+
fp = PyFile_AsFile(infile)
82+
else:
83+
raise TypeError("infile must be an open python file object")
84+
85+
sys.stdout.flush()
86+
87+
## and do the actual work!
88+
cdef int c
89+
cdef double value
90+
cdef char* format_string = "%lg"
91+
92+
cdef cnp.ndarray[double, ndim=1, mode="c"] out_arr
93+
if N == UINT32_MAX:
94+
# allocate an arbitarily small array
95+
# -- not too small, don't want to waste time making new arrays
96+
out_arr = np.zeros((128,), dtype= np.float64)
97+
else:
98+
out_arr = np.zeros((N,), dtype= np.float64)
99+
100+
# view onto output array, so that out_arr can be re-sized
101+
cdef double[:] arr_view = out_arr
102+
103+
num_read = 1
104+
while num_read <= N:
105+
## try to read a number
106+
## keep advancing char by char until you get one
107+
while True:
108+
j = stdio.fscanf(fp, format_string, &value)
109+
if j == 0:
110+
c = stdio.fgetc(fp)
111+
continue
112+
break
113+
if j == stdio.EOF:
114+
break
115+
if num_read > out_arr.shape[0]: # need to make the array bigger
116+
# NOTE: ndarray.resize does not work in Cython
117+
out_arr.resize( ( <int> out_arr.shape[0]*1.2, ), refcheck=False)
118+
arr_view = out_arr
119+
#temp = np.zeros( (num_read+<int> out_arr.shape[0]*1.5) )
120+
#temp[:num_read-1] = out_arr
121+
#out_arr = temp
122+
arr_view[num_read-1] = value
123+
num_read += 1
124+
num_read -= 1 # remove the extra tacked on at the end
125+
126+
if N != UINT32_MAX and num_read < N:
127+
raise ValueError("not enough values in the file -- only read %i"%num_read)
128+
129+
# advance past any whitespace left
130+
while True:
131+
c = stdio.fgetc(fp)
132+
if not isspace(c):
133+
# move back one char
134+
if c >-1: # not EOF
135+
stdio.fseek(fp, -1, stdio.SEEK_CUR)
136+
break
137+
138+
# resize to fit:
139+
if out_arr.shape[0] > num_read:
140+
# resize can work if you don't need cython to access the data later
141+
out_arr.resize( (num_read, ), refcheck=False )
142+
return out_arr
143+
144+
145+
@cython.boundscheck(False)
146+
def resize_test():
147+
"""
148+
test of bounds_check code in face of re-size
149+
"""
150+
cdef cnp.ndarray[double, ndim=1, mode="c"] arr
151+
152+
arr = np.zeros( (1,) )
153+
arr[0] = 3.14
154+
arr.resize((4,), refcheck = False)
155+
arr[1] = 5.6
156+
arr[2] = 7.1
157+
arr[3] = 4.3
158+
return arr
159+
160+
161+
162+
163+
164+
165+
166+
167+
168+
169+
170+
171+

0 commit comments

Comments
 (0)