forked from WebKit/WebKit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck-for-duplicated-platform-test-results
executable file
·205 lines (163 loc) · 8.56 KB
/
check-for-duplicated-platform-test-results
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python3
#
# Copyright (C) 2017 Igalia S.L.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public License
# along with this library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
import hashlib
import itertools
import logging
import optparse
import os
import sys
from webkitpy.common.host import Host
from webkitpy.common.system.logutils import configure_logging
from webkitpy.port.config import apple_additions
_log = logging.getLogger(__name__)
host = Host()
host.initialize_scm()
port = host.port_factory.get()
layout_tests_directory = port.layout_tests_dir()
platform_directory = os.path.join(layout_tests_directory, 'platform')
txt_types = (".webarchive", ".txt")
def remove_layout_test_path_prefix(full_path):
return os.path.relpath(full_path, layout_tests_directory)
def check_duplicate(platform, baseline_search_path, platform_test_result):
def sha1(path):
with open(path, 'rb') as f:
return hashlib.sha1(f.read()).hexdigest()
_log.debug(' Looking for duplicates of {0} in {1}'.format(remove_layout_test_path_prefix(platform_test_result), str(baseline_search_path)))
prefix_len = len(os.path.join(platform_directory, platform)) + 1
assert platform_test_result[:prefix_len - 1] == os.path.join(platform_directory, platform)
test_result_filename = platform_test_result[prefix_len:]
test_result_filename_base, test_result_filename_ext = os.path.splitext(platform_test_result[prefix_len:])
if test_result_filename_ext in txt_types:
try_baseline_filenames = [test_result_filename_base + txt_type for txt_type in txt_types]
else:
try_baseline_filenames = [test_result_filename]
platform_test_result_size = None
platform_test_result_sha1 = None
baseline_found = False
for baseline_path in baseline_search_path:
for try_baseline_filename in try_baseline_filenames:
baseline_test_result = os.path.join(baseline_path, try_baseline_filename)
if not os.path.isfile(baseline_test_result):
_log.debug(' No result at {0}'.format(remove_layout_test_path_prefix(baseline_test_result)))
continue
_log.debug(' Comparing with {0}'.format(remove_layout_test_path_prefix(baseline_test_result)))
baseline_found = True
if platform_test_result_size is None:
platform_test_result_size = os.path.getsize(platform_test_result)
baseline_test_result_size = os.path.getsize(baseline_test_result)
if platform_test_result_size != baseline_test_result_size:
_log.debug(' File size is different')
return False
if platform_test_result_sha1 is None:
platform_test_result_sha1 = sha1(platform_test_result)
baseline_test_result_sha1 = sha1(baseline_test_result)
if platform_test_result_sha1 != baseline_test_result_sha1:
_log.debug(' File hash is different')
return False
_log.debug(' Duplicate found: {0} and {1}'.format(remove_layout_test_path_prefix(platform_test_result), remove_layout_test_path_prefix(baseline_test_result)))
return baseline_found
def platform_list(platform):
platform_dirs = [platform_directory]
if apple_additions():
platform_dirs.append(apple_additions().layout_tests_path())
if platform == 'all':
existing_dirs = list(itertools.chain(*(os.listdir(path) for path in platform_dirs)))
for port_name in host.port_factory.all_port_names():
if port_name not in existing_dirs:
existing_dirs.append(port_name)
return sorted(existing_dirs)
return [platform]
def find_duplicates_in_path(baseline_search_path):
duplicates = []
remaining_paths = baseline_search_path
def find_duplicates():
_log.debug(' comparing files in {0} and {1}'.format(remove_layout_test_path_prefix(remaining_paths[0]), remove_layout_test_path_prefix(remaining_paths[1])))
for root, dirs, files in os.walk(remaining_paths[0]):
for file_name in files:
if '-expected' not in file_name:
continue
platform_test_result = os.path.join(root, file_name)
if check_duplicate(remaining_paths[0], remaining_paths[1:], platform_test_result):
duplicates.append(platform_test_result)
while len(remaining_paths) > 1:
find_duplicates()
remaining_paths = remaining_paths[1:]
return duplicates
def check_platform(options, baseline_search_paths_checked, webkit_test_runner):
total = 0
for platform in platform_list(options.platform):
try:
_log.debug('Trying to create port for platform {0}'.format(platform))
# Ports sometimes change options, so create a new object.
port_options = optparse.Values(defaults={"webkit_test_runner": webkit_test_runner})
port = host.port_factory.get(platform, port_options)
except:
_log.warning('Failed to create port object for {0}'.format(platform))
continue
if not port.supports_layout_tests():
_log.warning('Platform {0} does not support layout tests'.format(platform))
continue
try:
baseline_search_path = tuple([p for p in port.baseline_search_path() if os.path.isdir(p)] + [layout_tests_directory])
except Exception as e:
_log.warning('Error computing baseline search paths from {0}, {1}'.format(platform, e))
continue
if baseline_search_path in baseline_search_paths_checked:
continue
baseline_search_paths_checked.add(baseline_search_path)
_log.info('Checking search paths [{0}]'.format(', '.join(remove_layout_test_path_prefix(p) for p in baseline_search_path[:-1])))
duplicates = find_duplicates_in_path(baseline_search_path)
if not duplicates:
continue
if options.no_delete:
_log.info('Found the following duplicate results:')
_log.info('\n'.join(remove_layout_test_path_prefix(p) for p in duplicates))
else:
host.scm().delete_list(duplicates)
duplicates_len = len(duplicates)
total += duplicates_len
_log.info('{0} found in {1} -> generic\n'.format(duplicates_len, ' -> '.join([os.path.basename(p) for p in baseline_search_path[:-1]])))
return total
def main():
option_parser = optparse.OptionParser(usage='usage: %prog [options]')
option_parser.add_option('-p', '--platform',
action='store', dest='platform', default='all',
help='Platform to check for duplicated results. By the default all platform are checked')
option_parser.add_option('-n', '--no-delete',
action='store_true', dest='no_delete',
help='Do not delete the duplicated results found from the repository, list them instead')
option_parser.add_option('-v', '--verbose', action='store_true', default=False,
help='Enable verbose printing'),
options, args = option_parser.parse_args()
configure_logging(logger=_log)
_log.setLevel(logging.DEBUG if options.verbose else logging.INFO)
total = 0
baseline_search_paths_checked = set()
total += check_platform(options, baseline_search_paths_checked, webkit_test_runner=False)
total += check_platform(options, baseline_search_paths_checked, webkit_test_runner=True)
if total:
if options.no_delete:
# FIXME: Without deletion the total isn't accurate, because directories which are parents of wk1 and wk2 dirs will be tested twice.
_log.info('Found {0} duplicate results.'.format(total))
else:
_log.info('{0} files have been removed from the repository. Check the status and commit if everything is correct'.format(total))
else:
_log.info('No duplicated results found.')
if __name__ == '__main__':
sys.exit(main())