Skip to content

Commit 368a98f

Browse files
committed
ENH/MAINT Check for changes in pxd files too. Cleanup code
1 parent a7038a4 commit 368a98f

File tree

1 file changed

+86
-74
lines changed

1 file changed

+86
-74
lines changed

sklearn/_build_utils/cythonize.py

Lines changed: 86 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
corresponding C files. If they have, then runs cython on these files to
1212
recreate the C files.
1313
14-
The script detects changes in the pyx files using checksums [or hashes] stored
15-
in a database file
14+
The script detects changes in the pyx/pxd files using checksums
15+
[or hashes] stored in a database file
1616
1717
Simple script to invoke Cython on all .pyx
1818
files; while waiting for a proper build system. Uses file hashes to
@@ -27,10 +27,13 @@
2727
We copied it for scikit-learn.
2828
2929
Note: this script does not check any of the dependent C libraries; it only
30-
operates on the Cython .pyx files.
30+
operates on the Cython .pyx files or their corresponding Cython header (.pxd)
31+
files.
3132
"""
32-
# author: Arthur Mensch
33-
# license: BSD
33+
# Author: Arthur Mensch <[email protected]>
34+
# Author: Raghav R V <[email protected]>
35+
#
36+
# License: BSD 3 clause
3437

3538
from __future__ import division, print_function, absolute_import
3639

@@ -50,10 +53,7 @@
5053
WindowsError = None
5154

5255

53-
#
54-
# Rules
55-
#
56-
def process_pyx(fromfile, tofile):
56+
def cythonize(cython_file, gen_file):
5757
try:
5858
from Cython.Compiler.Version import version as cython_version
5959
from distutils.version import LooseVersion
@@ -64,53 +64,52 @@ def process_pyx(fromfile, tofile):
6464
pass
6565

6666
flags = ['--fast-fail']
67-
if tofile.endswith('.cpp'):
67+
if gen_file.endswith('.cpp'):
6868
flags += ['--cplus']
6969

7070
try:
7171
try:
72-
r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile])
73-
if r != 0:
74-
raise Exception('Cython failed')
72+
rc = subprocess.call(['cython'] +
73+
flags + ["-o", gen_file, cython_file])
74+
if rc != 0:
75+
raise Exception('Cythonizing %s failed' % cython_file)
7576
except OSError:
7677
# There are ways of installing Cython that don't result in a cython
7778
# executable on the path, see scipy issue gh-2397.
78-
r = subprocess.call([sys.executable, '-c',
79-
'import sys; from Cython.Compiler.Main '
80-
'import setuptools_main as main;'
81-
' sys.exit(main())'] + flags +
82-
["-o", tofile, fromfile])
83-
if r != 0:
84-
raise Exception('Cython failed')
79+
rc = subprocess.call([sys.executable, '-c',
80+
'import sys; from Cython.Compiler.Main '
81+
'import setuptools_main as main;'
82+
' sys.exit(main())'] + flags +
83+
["-o", gen_file, cython_file])
84+
if rc != 0:
85+
raise Exception('Cythonizing %s failed' % cython_file)
8586
except OSError:
8687
raise OSError('Cython needs to be installed')
8788

8889

89-
rules = {
90-
'.pyx': process_pyx,
91-
}
92-
93-
94-
#
95-
# Hash db
96-
#
9790
def load_hashes(filename):
98-
# Return { filename : (sha1 of input, sha1 of output) }
99-
if os.path.isfile(filename):
100-
hashes = {}
101-
with open(filename, 'r') as f:
102-
for line in f:
103-
filename, inhash, outhash = line.split()
104-
hashes[filename] = (inhash, outhash)
105-
else:
91+
"""Load the hashes dict from the hashfile"""
92+
# { filename : (sha1 of header if available or 'NA',
93+
# sha1 of input,
94+
# sha1 of output) }
95+
96+
hashes = {}
97+
try:
98+
with open(filename, 'r') as cython_hash_file:
99+
for hash_record in cython_hash_file:
100+
(filename, header_hash,
101+
cython_hash, gen_file_hash) = hash_record.split()
102+
hashes[filename] = (header_hash, cython_hash, gen_file_hash)
103+
except (KeyError, ValueError, AttributeError, IOError):
106104
hashes = {}
107105
return hashes
108106

109107

110-
def save_hashes(hash_db, filename):
111-
with open(filename, 'w') as f:
112-
for key, value in hash_db.items():
113-
f.write("%s %s %s\n" % (key, value[0], value[1]))
108+
def save_hashes(hashes, filename):
109+
"""Save the hashes dict to the hashfile"""
110+
with open(filename, 'w') as cython_hash_file:
111+
for key, value in hashes.items():
112+
cython_hash_file.write("%s %s %s %s\n" % (key, value[0], value[1], value[2]))
114113

115114

116115
def sha1_of_file(filename):
@@ -120,59 +119,72 @@ def sha1_of_file(filename):
120119
return h.hexdigest()
121120

122121

123-
#
124-
# Main program
125-
#
126-
def normpath(path):
122+
def clean_path(path):
123+
"""Clean the path"""
127124
path = path.replace(os.sep, '/')
128125
if path.startswith('./'):
129126
path = path[2:]
130127
return path
131128

132129

133-
def get_hash(frompath, topath):
134-
from_hash = sha1_of_file(frompath)
135-
to_hash = sha1_of_file(topath) if os.path.exists(topath) else None
136-
return from_hash, to_hash
130+
def get_hash_tuple(header_path, cython_path, gen_file_path):
131+
"""Get the hashes from the given files"""
132+
133+
header_hash = (sha1_of_file(header_path)
134+
if os.path.exists(header_path) else 'NA')
135+
from_hash = sha1_of_file(cython_path)
136+
to_hash = (sha1_of_file(gen_file_path)
137+
if os.path.exists(gen_file_path) else 'NA')
138+
139+
return header_hash, from_hash, to_hash
140+
137141

142+
def cythonize_if_unchanged(path, cython_file, gen_file, hashes):
143+
full_cython_path = os.path.join(path, cython_file)
144+
full_header_path = full_cython_path.replace('.pyx', '.pxd')
145+
full_gen_file_path = os.path.join(path, gen_file)
138146

139-
def process(path, fromfile, tofile, processor_function, hash_db):
140-
fullfrompath = os.path.join(path, fromfile)
141-
fulltopath = os.path.join(path, tofile)
142-
current_hash = get_hash(fullfrompath, fulltopath)
143-
if current_hash == hash_db.get(normpath(fullfrompath)):
144-
print('%s has not changed' % fullfrompath)
147+
current_hash = get_hash_tuple(full_header_path, full_cython_path,
148+
full_gen_file_path)
149+
150+
if current_hash == hashes.get(clean_path(full_cython_path)):
151+
print('%s has not changed' % full_cython_path)
145152
return
146153

147-
print('Processing %s' % fullfrompath)
148-
processor_function(fullfrompath, fulltopath)
154+
print('Processing %s' % full_cython_path)
155+
cythonize(full_cython_path, full_gen_file_path)
156+
149157
# changed target file, recompute hash
150-
current_hash = get_hash(fullfrompath, fulltopath)
151-
# store hash in db
152-
hash_db[normpath(fullfrompath)] = current_hash
158+
current_hash = get_hash_tuple(full_header_path, full_cython_path,
159+
full_gen_file_path)
160+
161+
# Update the hashes dict with the new hash
162+
hashes[clean_path(full_cython_path)] = current_hash
153163

154164

155-
def find_process_files(root_dir):
165+
def check_and_cythonize(root_dir):
156166
print(root_dir)
157-
hash_db = load_hashes(HASH_FILE)
167+
hashes = load_hashes(HASH_FILE)
168+
158169
for cur_dir, dirs, files in os.walk(root_dir):
159170
for filename in files:
160-
for fromext, function in rules.items():
161-
if filename.endswith(fromext):
162-
toext = ".c"
163-
with open(os.path.join(cur_dir, filename), 'rb') as f:
164-
data = f.read()
165-
m = re.search(b"libcpp", data, re.I | re.M)
166-
if m:
167-
toext = ".cpp"
168-
fromfile = filename
169-
tofile = filename[:-len(fromext)] + toext
170-
process(cur_dir, fromfile, tofile, function, hash_db)
171-
save_hashes(hash_db, HASH_FILE)
171+
if filename.endswith('.pyx'):
172+
gen_file_ext = '.c'
173+
# Cython files with libcpp imports should be compiled to cpp
174+
with open(os.path.join(cur_dir, filename), 'rb') as f:
175+
data = f.read()
176+
m = re.search(b"libcpp", data, re.I | re.M)
177+
if m:
178+
gen_file_ext = ".cpp"
179+
cython_file = filename
180+
gen_file = filename.replace('.pyx', gen_file_ext)
181+
cythonize_if_unchanged(cur_dir, cython_file, gen_file, hashes)
182+
183+
save_hashes(hashes, HASH_FILE)
172184

173185

174186
def main(root_dir=DEFAULT_ROOT):
175-
find_process_files(root_dir)
187+
check_and_cythonize(root_dir)
176188

177189

178190
if __name__ == '__main__':

0 commit comments

Comments
 (0)