1111corresponding C files. If they have, then runs cython on these files to
1212recreate the C files.
1313
14- The script detects changes in the pyx files using checksums [or hashes] stored
15- in a database file
14+ The script detects changes in the pyx/pxd files using checksums
15+ [or hashes] stored in a database file
1616
1717Simple script to invoke Cython on all .pyx
1818files; while waiting for a proper build system. Uses file hashes to
2727We copied it for scikit-learn.
2828
2929Note: this script does not check any of the dependent C libraries; it only
30- operates on the Cython .pyx files.
30+ operates on the Cython .pyx files or their corresponding Cython header (.pxd)
31+ files.
3132"""
32- # author: Arthur Mensch
33- # license: BSD
33+ # Author: Arthur Mensch <[email protected] > 34+ # Author: Raghav R V <[email protected] > 35+ #
36+ # License: BSD 3 clause
3437
3538from __future__ import division , print_function , absolute_import
3639
5053 WindowsError = None
5154
5255
53- #
54- # Rules
55- #
56- def process_pyx (fromfile , tofile ):
56+ def cythonize (cython_file , gen_file ):
5757 try :
5858 from Cython .Compiler .Version import version as cython_version
5959 from distutils .version import LooseVersion
@@ -64,53 +64,52 @@ def process_pyx(fromfile, tofile):
6464 pass
6565
6666 flags = ['--fast-fail' ]
67- if tofile .endswith ('.cpp' ):
67+ if gen_file .endswith ('.cpp' ):
6868 flags += ['--cplus' ]
6969
7070 try :
7171 try :
72- r = subprocess .call (['cython' ] + flags + ["-o" , tofile , fromfile ])
73- if r != 0 :
74- raise Exception ('Cython failed' )
72+ rc = subprocess .call (['cython' ] +
73+ flags + ["-o" , gen_file , cython_file ])
74+ if rc != 0 :
75+ raise Exception ('Cythonizing %s failed' % cython_file )
7576 except OSError :
7677 # There are ways of installing Cython that don't result in a cython
7778 # executable on the path, see scipy issue gh-2397.
78- r = subprocess .call ([sys .executable , '-c' ,
79- 'import sys; from Cython.Compiler.Main '
80- 'import setuptools_main as main;'
81- ' sys.exit(main())' ] + flags +
82- ["-o" , tofile , fromfile ])
83- if r != 0 :
84- raise Exception ('Cython failed' )
79+ rc = subprocess .call ([sys .executable , '-c' ,
80+ 'import sys; from Cython.Compiler.Main '
81+ 'import setuptools_main as main;'
82+ ' sys.exit(main())' ] + flags +
83+ ["-o" , gen_file , cython_file ])
84+ if rc != 0 :
85+ raise Exception ('Cythonizing %s failed' % cython_file )
8586 except OSError :
8687 raise OSError ('Cython needs to be installed' )
8788
8889
89- rules = {
90- '.pyx' : process_pyx ,
91- }
92-
93-
94- #
95- # Hash db
96- #
9790def load_hashes (filename ):
98- # Return { filename : (sha1 of input, sha1 of output) }
99- if os .path .isfile (filename ):
100- hashes = {}
101- with open (filename , 'r' ) as f :
102- for line in f :
103- filename , inhash , outhash = line .split ()
104- hashes [filename ] = (inhash , outhash )
105- else :
91+ """Load the hashes dict from the hashfile"""
92+ # { filename : (sha1 of header if available or 'NA',
93+ # sha1 of input,
94+ # sha1 of output) }
95+
96+ hashes = {}
97+ try :
98+ with open (filename , 'r' ) as cython_hash_file :
99+ for hash_record in cython_hash_file :
100+ (filename , header_hash ,
101+ cython_hash , gen_file_hash ) = hash_record .split ()
102+ hashes [filename ] = (header_hash , cython_hash , gen_file_hash )
103+ except (KeyError , ValueError , AttributeError , IOError ):
106104 hashes = {}
107105 return hashes
108106
109107
110- def save_hashes (hash_db , filename ):
111- with open (filename , 'w' ) as f :
112- for key , value in hash_db .items ():
113- f .write ("%s %s %s\n " % (key , value [0 ], value [1 ]))
108+ def save_hashes (hashes , filename ):
109+ """Save the hashes dict to the hashfile"""
110+ with open (filename , 'w' ) as cython_hash_file :
111+ for key , value in hashes .items ():
112+ cython_hash_file .write ("%s %s %s %s\n " % (key , value [0 ], value [1 ], value [2 ]))
114113
115114
116115def sha1_of_file (filename ):
@@ -120,59 +119,72 @@ def sha1_of_file(filename):
120119 return h .hexdigest ()
121120
122121
123- #
124- # Main program
125- #
126- def normpath (path ):
122+ def clean_path (path ):
123+ """Clean the path"""
127124 path = path .replace (os .sep , '/' )
128125 if path .startswith ('./' ):
129126 path = path [2 :]
130127 return path
131128
132129
133- def get_hash (frompath , topath ):
134- from_hash = sha1_of_file (frompath )
135- to_hash = sha1_of_file (topath ) if os .path .exists (topath ) else None
136- return from_hash , to_hash
130+ def get_hash_tuple (header_path , cython_path , gen_file_path ):
131+ """Get the hashes from the given files"""
132+
133+ header_hash = (sha1_of_file (header_path )
134+ if os .path .exists (header_path ) else 'NA' )
135+ from_hash = sha1_of_file (cython_path )
136+ to_hash = (sha1_of_file (gen_file_path )
137+ if os .path .exists (gen_file_path ) else 'NA' )
138+
139+ return header_hash , from_hash , to_hash
140+
137141
142+ def cythonize_if_unchanged (path , cython_file , gen_file , hashes ):
143+ full_cython_path = os .path .join (path , cython_file )
144+ full_header_path = full_cython_path .replace ('.pyx' , '.pxd' )
145+ full_gen_file_path = os .path .join (path , gen_file )
138146
139- def process (path , fromfile , tofile , processor_function , hash_db ):
140- fullfrompath = os .path .join (path , fromfile )
141- fulltopath = os .path .join (path , tofile )
142- current_hash = get_hash (fullfrompath , fulltopath )
143- if current_hash == hash_db .get (normpath (fullfrompath )):
144- print ('%s has not changed' % fullfrompath )
147+ current_hash = get_hash_tuple (full_header_path , full_cython_path ,
148+ full_gen_file_path )
149+
150+ if current_hash == hashes .get (clean_path (full_cython_path )):
151+ print ('%s has not changed' % full_cython_path )
145152 return
146153
147- print ('Processing %s' % fullfrompath )
148- processor_function (fullfrompath , fulltopath )
154+ print ('Processing %s' % full_cython_path )
155+ cythonize (full_cython_path , full_gen_file_path )
156+
149157 # changed target file, recompute hash
150- current_hash = get_hash (fullfrompath , fulltopath )
151- # store hash in db
152- hash_db [normpath (fullfrompath )] = current_hash
158+ current_hash = get_hash_tuple (full_header_path , full_cython_path ,
159+ full_gen_file_path )
160+
161+ # Update the hashes dict with the new hash
162+ hashes [clean_path (full_cython_path )] = current_hash
153163
154164
155- def find_process_files (root_dir ):
165+ def check_and_cythonize (root_dir ):
156166 print (root_dir )
157- hash_db = load_hashes (HASH_FILE )
167+ hashes = load_hashes (HASH_FILE )
168+
158169 for cur_dir , dirs , files in os .walk (root_dir ):
159170 for filename in files :
160- for fromext , function in rules .items ():
161- if filename .endswith (fromext ):
162- toext = ".c"
163- with open (os .path .join (cur_dir , filename ), 'rb' ) as f :
164- data = f .read ()
165- m = re .search (b"libcpp" , data , re .I | re .M )
166- if m :
167- toext = ".cpp"
168- fromfile = filename
169- tofile = filename [:- len (fromext )] + toext
170- process (cur_dir , fromfile , tofile , function , hash_db )
171- save_hashes (hash_db , HASH_FILE )
171+ if filename .endswith ('.pyx' ):
172+ gen_file_ext = '.c'
173+ # Cython files with libcpp imports should be compiled to cpp
174+ with open (os .path .join (cur_dir , filename ), 'rb' ) as f :
175+ data = f .read ()
176+ m = re .search (b"libcpp" , data , re .I | re .M )
177+ if m :
178+ gen_file_ext = ".cpp"
179+ cython_file = filename
180+ gen_file = filename .replace ('.pyx' , gen_file_ext )
181+ cythonize_if_unchanged (cur_dir , cython_file , gen_file , hashes )
182+
183+ save_hashes (hashes , HASH_FILE )
172184
173185
174186def main (root_dir = DEFAULT_ROOT ):
175- find_process_files (root_dir )
187+ check_and_cythonize (root_dir )
176188
177189
178190if __name__ == '__main__' :
0 commit comments