Skip to content

Commit 60743a9

Browse files
aunzatplanet36CuriousLearner
authored
gh-57141: Add dircmp shallow option (GH-109499)
Co-authored-by: Steve Ward <[email protected]> Co-authored-by: Sanyam Khurana <[email protected]>
1 parent ea1b1c5 commit 60743a9

File tree

4 files changed

+120
-31
lines changed

4 files changed

+120
-31
lines changed

Doc/library/filecmp.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,20 @@ The :mod:`filecmp` module defines the following functions:
7070
The :class:`dircmp` class
7171
-------------------------
7272

73-
.. class:: dircmp(a, b, ignore=None, hide=None)
73+
.. class:: dircmp(a, b, ignore=None, hide=None, shallow=True)
7474

7575
Construct a new directory comparison object, to compare the directories *a*
7676
and *b*. *ignore* is a list of names to ignore, and defaults to
7777
:const:`filecmp.DEFAULT_IGNORES`. *hide* is a list of names to hide, and
7878
defaults to ``[os.curdir, os.pardir]``.
7979

8080
The :class:`dircmp` class compares files by doing *shallow* comparisons
81-
as described for :func:`filecmp.cmp`.
81+
as described for :func:`filecmp.cmp` by default using the *shallow*
82+
parameter.
83+
84+
.. versionchanged:: 3.13
85+
86+
Added the *shallow* parameter.
8287

8388
The :class:`dircmp` class provides the following methods:
8489

Lib/filecmp.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,15 @@ def _do_cmp(f1, f2):
8888
class dircmp:
8989
"""A class that manages the comparison of 2 directories.
9090
91-
dircmp(a, b, ignore=None, hide=None)
91+
dircmp(a, b, ignore=None, hide=None, shallow=True)
9292
A and B are directories.
9393
IGNORE is a list of names to ignore,
9494
defaults to DEFAULT_IGNORES.
9595
HIDE is a list of names to hide,
9696
defaults to [os.curdir, os.pardir].
97+
SHALLOW specifies whether to just check the stat signature (do not read
98+
the files).
99+
defaults to True.
97100
98101
High level usage:
99102
x = dircmp(dir1, dir2)
@@ -121,7 +124,7 @@ class dircmp:
121124
in common_dirs.
122125
"""
123126

124-
def __init__(self, a, b, ignore=None, hide=None): # Initialize
127+
def __init__(self, a, b, ignore=None, hide=None, shallow=True): # Initialize
125128
self.left = a
126129
self.right = b
127130
if hide is None:
@@ -132,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize
132135
self.ignore = DEFAULT_IGNORES
133136
else:
134137
self.ignore = ignore
138+
self.shallow = shallow
135139

136140
def phase0(self): # Compare everything except common subdirectories
137141
self.left_list = _filter(os.listdir(self.left),
@@ -184,7 +188,7 @@ def phase2(self): # Distinguish files, directories, funnies
184188
self.common_funny.append(x)
185189

186190
def phase3(self): # Find out differences between common files
187-
xx = cmpfiles(self.left, self.right, self.common_files)
191+
xx = cmpfiles(self.left, self.right, self.common_files, self.shallow)
188192
self.same_files, self.diff_files, self.funny_files = xx
189193

190194
def phase4(self): # Find out differences between common subdirectories
@@ -196,7 +200,8 @@ def phase4(self): # Find out differences between common subdirectories
196200
for x in self.common_dirs:
197201
a_x = os.path.join(self.left, x)
198202
b_x = os.path.join(self.right, x)
199-
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide)
203+
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide,
204+
self.shallow)
200205

201206
def phase4_closure(self): # Recursively call phase4() on subdirectories
202207
self.phase4()

Lib/test/test_filecmp.py

Lines changed: 101 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,44 @@
88
from test.support import os_helper
99

1010

11+
def _create_file_shallow_equal(template_path, new_path):
12+
"""create a file with the same size and mtime but different content."""
13+
shutil.copy2(template_path, new_path)
14+
with open(new_path, 'r+b') as f:
15+
next_char = bytearray(f.read(1))
16+
next_char[0] = (next_char[0] + 1) % 256
17+
f.seek(0)
18+
f.write(next_char)
19+
shutil.copystat(template_path, new_path)
20+
assert os.stat(new_path).st_size == os.stat(template_path).st_size
21+
assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime
22+
1123
class FileCompareTestCase(unittest.TestCase):
1224
def setUp(self):
1325
self.name = os_helper.TESTFN
1426
self.name_same = os_helper.TESTFN + '-same'
1527
self.name_diff = os_helper.TESTFN + '-diff'
28+
self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
1629
data = 'Contents of file go here.\n'
1730
for name in [self.name, self.name_same, self.name_diff]:
1831
with open(name, 'w', encoding="utf-8") as output:
1932
output.write(data)
2033

2134
with open(self.name_diff, 'a+', encoding="utf-8") as output:
2235
output.write('An extra line.\n')
36+
37+
for name in [self.name_same, self.name_diff]:
38+
shutil.copystat(self.name, name)
39+
40+
_create_file_shallow_equal(self.name, self.name_same_shallow)
41+
2342
self.dir = tempfile.gettempdir()
2443

2544
def tearDown(self):
2645
os.unlink(self.name)
2746
os.unlink(self.name_same)
2847
os.unlink(self.name_diff)
48+
os.unlink(self.name_same_shallow)
2949

3050
def test_matching(self):
3151
self.assertTrue(filecmp.cmp(self.name, self.name),
@@ -36,12 +56,17 @@ def test_matching(self):
3656
"Comparing file to identical file fails")
3757
self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
3858
"Comparing file to identical file fails")
59+
self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
60+
"Shallow identical files should be considered equal")
3961

4062
def test_different(self):
4163
self.assertFalse(filecmp.cmp(self.name, self.name_diff),
4264
"Mismatched files compare as equal")
4365
self.assertFalse(filecmp.cmp(self.name, self.dir),
4466
"File and directory compare as equal")
67+
self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
68+
shallow=False),
69+
"Mismatched file to shallow identical file compares as equal")
4570

4671
def test_cache_clear(self):
4772
first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
@@ -56,14 +81,26 @@ def setUp(self):
5681
self.dir = os.path.join(tmpdir, 'dir')
5782
self.dir_same = os.path.join(tmpdir, 'dir-same')
5883
self.dir_diff = os.path.join(tmpdir, 'dir-diff')
84+
self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
85+
self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')
5986

6087
# Another dir is created under dir_same, but it has a name from the
6188
# ignored list so it should not affect testing results.
6289
self.dir_ignored = os.path.join(self.dir_same, '.hg')
6390

6491
self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
6592
data = 'Contents of file go here.\n'
66-
for dir in (self.dir, self.dir_same, self.dir_diff, self.dir_ignored):
93+
94+
shutil.rmtree(self.dir, True)
95+
os.mkdir(self.dir)
96+
subdir_path = os.path.join(self.dir, 'subdir')
97+
os.mkdir(subdir_path)
98+
dir_file_path = os.path.join(self.dir, "file")
99+
with open(dir_file_path, 'w', encoding="utf-8") as output:
100+
output.write(data)
101+
102+
for dir in (self.dir_same, self.dir_same_shallow,
103+
self.dir_diff, self.dir_diff_file):
67104
shutil.rmtree(dir, True)
68105
os.mkdir(dir)
69106
subdir_path = os.path.join(dir, 'subdir')
@@ -72,14 +109,25 @@ def setUp(self):
72109
fn = 'FiLe' # Verify case-insensitive comparison
73110
else:
74111
fn = 'file'
75-
with open(os.path.join(dir, fn), 'w', encoding="utf-8") as output:
76-
output.write(data)
112+
113+
file_path = os.path.join(dir, fn)
114+
115+
if dir is self.dir_same_shallow:
116+
_create_file_shallow_equal(dir_file_path, file_path)
117+
else:
118+
shutil.copy2(dir_file_path, file_path)
77119

78120
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
79121
output.write('An extra file.\n')
80122

123+
# Add different file2 with respect to dir_diff
124+
with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
125+
output.write('Different contents.\n')
126+
127+
81128
def tearDown(self):
82-
for dir in (self.dir, self.dir_same, self.dir_diff):
129+
for dir in (self.dir, self.dir_same, self.dir_diff,
130+
self.dir_same_shallow, self.dir_diff_file):
83131
shutil.rmtree(dir)
84132

85133
def test_default_ignores(self):
@@ -102,11 +150,7 @@ def test_cmpfiles(self):
102150
shallow=False),
103151
"Comparing directory to same fails")
104152

105-
# Add different file2
106-
with open(os.path.join(self.dir, 'file2'), 'w', encoding="utf-8") as output:
107-
output.write('Different contents.\n')
108-
109-
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_same,
153+
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
110154
['file', 'file2']) ==
111155
(['file'], ['file2'], []),
112156
"Comparing mismatched directories fails")
@@ -116,11 +160,22 @@ def _assert_lists(self, actual, expected):
116160
"""Assert that two lists are equal, up to ordering."""
117161
self.assertEqual(sorted(actual), sorted(expected))
118162

163+
def test_dircmp_identical_directories(self):
164+
self._assert_dircmp_identical_directories()
165+
self._assert_dircmp_identical_directories(shallow=False)
119166

120-
def test_dircmp(self):
167+
def test_dircmp_different_file(self):
168+
self._assert_dircmp_different_file()
169+
self._assert_dircmp_different_file(shallow=False)
170+
171+
def test_dircmp_different_directories(self):
172+
self._assert_dircmp_different_directories()
173+
self._assert_dircmp_different_directories(shallow=False)
174+
175+
def _assert_dircmp_identical_directories(self, **options):
121176
# Check attributes for comparison of two identical directories
122177
left_dir, right_dir = self.dir, self.dir_same
123-
d = filecmp.dircmp(left_dir, right_dir)
178+
d = filecmp.dircmp(left_dir, right_dir, **options)
124179
self.assertEqual(d.left, left_dir)
125180
self.assertEqual(d.right, right_dir)
126181
if self.caseinsensitive:
@@ -142,9 +197,10 @@ def test_dircmp(self):
142197
]
143198
self._assert_report(d.report, expected_report)
144199

200+
def _assert_dircmp_different_directories(self, **options):
145201
# Check attributes for comparison of two different directories (right)
146202
left_dir, right_dir = self.dir, self.dir_diff
147-
d = filecmp.dircmp(left_dir, right_dir)
203+
d = filecmp.dircmp(left_dir, right_dir, **options)
148204
self.assertEqual(d.left, left_dir)
149205
self.assertEqual(d.right, right_dir)
150206
self._assert_lists(d.left_list, ['file', 'subdir'])
@@ -164,12 +220,8 @@ def test_dircmp(self):
164220
self._assert_report(d.report, expected_report)
165221

166222
# Check attributes for comparison of two different directories (left)
167-
left_dir, right_dir = self.dir, self.dir_diff
168-
shutil.move(
169-
os.path.join(self.dir_diff, 'file2'),
170-
os.path.join(self.dir, 'file2')
171-
)
172-
d = filecmp.dircmp(left_dir, right_dir)
223+
left_dir, right_dir = self.dir_diff, self.dir
224+
d = filecmp.dircmp(left_dir, right_dir, **options)
173225
self.assertEqual(d.left, left_dir)
174226
self.assertEqual(d.right, right_dir)
175227
self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
@@ -180,27 +232,51 @@ def test_dircmp(self):
180232
self.assertEqual(d.same_files, ['file'])
181233
self.assertEqual(d.diff_files, [])
182234
expected_report = [
183-
"diff {} {}".format(self.dir, self.dir_diff),
184-
"Only in {} : ['file2']".format(self.dir),
235+
"diff {} {}".format(self.dir_diff, self.dir),
236+
"Only in {} : ['file2']".format(self.dir_diff),
185237
"Identical files : ['file']",
186238
"Common subdirectories : ['subdir']",
187239
]
188240
self._assert_report(d.report, expected_report)
189241

190-
# Add different file2
191-
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
192-
output.write('Different contents.\n')
193-
d = filecmp.dircmp(self.dir, self.dir_diff)
242+
243+
def _assert_dircmp_different_file(self, **options):
244+
# A different file2
245+
d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
194246
self.assertEqual(d.same_files, ['file'])
195247
self.assertEqual(d.diff_files, ['file2'])
196248
expected_report = [
197-
"diff {} {}".format(self.dir, self.dir_diff),
249+
"diff {} {}".format(self.dir_diff, self.dir_diff_file),
198250
"Identical files : ['file']",
199251
"Differing files : ['file2']",
200252
"Common subdirectories : ['subdir']",
201253
]
202254
self._assert_report(d.report, expected_report)
203255

256+
def test_dircmp_no_shallow_different_file(self):
257+
# A non shallow different file2
258+
d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
259+
self.assertEqual(d.same_files, [])
260+
self.assertEqual(d.diff_files, ['file'])
261+
expected_report = [
262+
"diff {} {}".format(self.dir, self.dir_same_shallow),
263+
"Differing files : ['file']",
264+
"Common subdirectories : ['subdir']",
265+
]
266+
self._assert_report(d.report, expected_report)
267+
268+
def test_dircmp_shallow_same_file(self):
269+
# A non shallow different file2
270+
d = filecmp.dircmp(self.dir, self.dir_same_shallow)
271+
self.assertEqual(d.same_files, ['file'])
272+
self.assertEqual(d.diff_files, [])
273+
expected_report = [
274+
"diff {} {}".format(self.dir, self.dir_same_shallow),
275+
"Identical files : ['file']",
276+
"Common subdirectories : ['subdir']",
277+
]
278+
self._assert_report(d.report, expected_report)
279+
204280
def test_dircmp_subdirs_type(self):
205281
"""Check that dircmp.subdirs respects subclassing."""
206282
class MyDirCmp(filecmp.dircmp):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add option for *non-shallow* comparisons to :class:`filecmp.dircmp` like
2+
:func:`filecmp.cmp`. Original patch by Steven Ward. Enhanced by
3+
Tobias Rautenkranz

0 commit comments

Comments
 (0)