aboutsummaryrefslogtreecommitdiffstats
path: root/main.py
blob: 003d92c7974ff532d30bc12953f3d25691dd0849 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# Copyright (C) 2023 The Qt Company Ltd.
# Contact: https://www.qt.io/licensing/
#
# You may use this file under the terms of the 3-clause BSD license.
# See the file LICENSE in qt/qtrepotools for details.
#

""" This script listens for incoming webhook requests of patchset-created type
    from Gerrit, checks out the patch locally, and runs flakes8 on it.
    It then posts a comment for each issue identified to Gerrit with the results.
"""

import json
import os
import sys
import asyncio
import base64
import fnmatch
import atexit
from urllib.parse import urlparse
from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError
import logging
from systemd.journal import JournalHandler
import tempfile

from aiohttp import web

log = logging.getLogger('flake8_bot')
log.addHandler(JournalHandler())
log.setLevel(logging.INFO)


GERRIT_USERNAME = os.environ.get('GERRIT_USERNAME')
GERRIT_PASSWORD = os.environ.get('GERRIT_PASSWORD')

if not GERRIT_USERNAME or not GERRIT_PASSWORD:
    log.info('Please set the GERRIT_USERNAME and GERRIT_PASSWORD environment variables.')
    sys.exit(1)

# Base64 encode the username and password
GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD
GERRIT_AUTH = GERRIT_AUTH.encode('utf-8')
GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8')

class Lock:
    """ Set up a semaphore-like lock to prevent acting on the git repo while it is in use.
        This is necessary because the Gerrit webhook is asynchronous, so multiple
        requests may come in at once.
    """
    def __init__(self):
        self.locked = False

    async def acquire(self):
        """ Acquire the lock. """
        while True:
            if self.locked:
                await asyncio.sleep(1)
                continue
            self.locked = True
            break  # If the lock is acquired successfully, break the loop

    def release(self):
        """ Release the lock. """
        self.locked = False

semaphore = Lock()

async def clone_repo(data):
    """ Clone the target repo and check out the branch. """
    log.info("Cloning repo %s", data['change']['project'])
    if os.path.exists(data['repo_name']):
        # return if the repo already exists
        return
    repo_url = "https://codereview.qt-project.org/" + data['change']['project'] + ".git"
    # Clone the repo
    p = await asyncio.create_subprocess_exec('git', 'clone', repo_url, data['repo_name'])
    await p.communicate()
    os.chdir(data['repo_name'])
    # Check out the branch
    p = await asyncio.create_subprocess_exec('git', 'checkout', data['change']['branch'])
    await p.communicate()
    os.chdir('..')


async def checkout_patch(data):
    """ Check out the patch. """

    log.info("%s: Checking out patch", data['change']['number'])
    # Check out the patch
    os.chdir(data['repo_name'])
    # git clean -fdx first to remove any untracked files
    p = await asyncio.create_subprocess_exec('git', 'clean', '-fdx')
    await p.communicate()
    # git fetch origin <ref>
    p = await asyncio.create_subprocess_exec('git', 'fetch', 'origin', data['patchSet']['ref'])
    await p.communicate()
    p = await asyncio.create_subprocess_exec('git', 'checkout', 'FETCH_HEAD')
    await p.communicate()
    os.chdir('..')


async def run_flake8(data):
    """ Run flake8 on the patch. """

    log.info("%s: Running flake8", data['change']['number'])
    comments_per_file = {}
    os.chdir(data['repo_name'])
    # Get the list of files changed in this patch
    p = await asyncio.create_subprocess_exec('git', 'diff-tree', '--no-commit-id',
                                             '--name-status', '-r', 'FETCH_HEAD',
                                             stdout=asyncio.subprocess.PIPE)
    stdout, stderr = await p.communicate()
    # Parse the output
    changed_files = []  # Empty list to store file names
    for line in stdout.decode().split('\n'):
        if len(line) > 0 and not line.startswith("D"):  # Skip empty lines and deleted files
            changed_files.append(line.split('\t')[1])  # Add the file name to the list



    ignore_patterns = ["rc_*.py", "*_rc.py", "ui_*.py"]

    # Use the .flake8 file in the repo
    flake8_config = os.path.join(os.getcwd(), '.flake8')
    log.info("Trying flake8 config: %s", flake8_config)
    fallback_config = False
    if not os.path.exists(flake8_config):
        fallback_config = True
        log.warning("No .flake8 file found. Using default config.")
        # Create a default .flake8 file if it doesn't exist in a temp file
        with open(tempfile.NamedTemporaryFile(delete=False).name, 'w', encoding='utf-8') as f:
            f.write(
                """[flake8]
ignore = E115,E265,W503
max-line-length = 100
exclude = rc_*.py,*_rc.py,ui_*.py
per-file-ignores =
    # for init_test_paths() hack
    *_test_*.py:E402
    __init__.py:F401,E402
"""
            )
            flake8_config = f.name
    checked_file_count = 0
    # Run flake8 on each file
    for file in changed_files:
        if not file.endswith('.py'):
            continue # Don't call flake8 on non-python files
        if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns):
            continue  # Skip this file if it matches any of the other ignore patterns

        checked_file_count += 1

        # Run flake8 on the diff
        process = await asyncio.create_subprocess_exec(
            'flake8',
            '--config=' + flake8_config,
            '--format=%(path)s;;%(row)d;;%(code)s;;%(text)s',
            file,
            stdout=asyncio.subprocess.PIPE
        )
        stdout, stderr = await process.communicate()
        # Parse the output
        output = stdout.decode()
        log.debug("Flake8 output: %s", output)
        # Split the output into lines
        lines = output.split('\n')
        # Parse each line
        for line in lines:
            # Split the line into its components
            components = line.split(';;')
            if len(components) == 4:
                # Parse the line components
                file_name = components[0]
                line_number = components[1]
                error_code = components[2]
                error_message = components[3]
                # Add the comment to the list of comments for this file
                if file_name not in comments_per_file and file_name in changed_files:
                    comments_per_file[file_name] = []
                comments_per_file[file_name].append(
                    {'line': line_number, 'message': error_code + ': ' + error_message})
    if fallback_config:
        os.remove(flake8_config)
    os.chdir('..')
    if checked_file_count == 0:
        log.info("%s: No python files changed.",
                data['change']['number'])
        return None, 0
    log.info("%s: Comments: %s", data['change']['number'], json.dumps(comments_per_file))
    return comments_per_file, checked_file_count


def generate_review(comments_per_file, change_number):
    """ Generate a review from the comments. """

    log.info("Generating review")
    if not comments_per_file or len(comments_per_file.keys()) == 0:
        review = {
            'message': "No flake8 issues found. Looks good.",
            'labels': {'Sanity-Review': 1},
            'tag': "autogenerated:flake8"
        }
        return review
    ported_comments = fetch_ported_comments(change_number)
    comment_inputs = {}
    for file_name, comments in comments_per_file.items():
        for comment in comments:
            skip_duplicate = False
            # Check if this comment is a ported comment
            if ported_comments and ported_comments.get(file_name):
                for ported_comment in ported_comments[file_name]:
                    if ported_comment['message'] == comment['message'] and \
                        (ported_comment['line'] == comment.get('line') or not comment.get('line')):
                        skip_duplicate = True
                        log.debug("Skipping duplicate comment: %s", comment['message'])
                        break
            if skip_duplicate:
                continue
            if file_name not in comment_inputs:
                comment_inputs[file_name] = []
            comment_inputs[file_name].append({
                'line': comment['line'],
                'message': comment['message'],
                'unresolved': 'true'
            })
    message = "Flake8 identified issues in this change."
    if not comment_inputs:
        message = "Flake8 identified issues which remain unresolved from a previous patchset." \
                  " Please address those issues."
    review = {
        'message': message,
        'comments': comment_inputs,
        'labels': {'Code-Review': -1}
    }
    return review

def fetch_ported_comments(changeId):
    """ Fetch the ported comments from the Gerrit API. 
    Returns a map of CommentInfo objects keyed by file name."""

    log.info("%s: Fetching ported comments", changeId)
    url = f"https://codereview.qt-project.org/a/changes/{changeId}/revisions/current/ported_comments"
    headers = {'Content-Type': 'application/json;charset=UTF-8',
               'Authorization': 'Basic ' + GERRIT_AUTH}
    req = Request(url, headers=headers)
    try:
        response = urlopen(req)
    except HTTPError as e:
        log.info('Error fetching ported comments: %s %s', str(e.code), e.reason)
    except URLError as e:
        log.info('Error fetching ported comments: %s', str(e.reason))
    else:
        data = json.loads(response.read().decode('utf-8').replace(")]}'", ''))
        log.info("%s: Ported comments: %s", changeId, json.dumps(data))
        return data
    return None

async def post_review(data, review, retry=0):
    """ Post the review to Gerrit. """

    log.info("%s: Posting review", data['change']['number'])
    change_number = data['change']['number']
    revision = data['patchSet']['revision']
    url = f"https://codereview.qt-project.org/a/changes/{change_number}/revisions/{revision}/review"
    review_data = json.dumps(review).encode('utf-8')
    headers = {'Content-Type': 'application/json;charset=UTF-8',
               'Authorization': 'Basic ' + GERRIT_AUTH}
    req = Request(url, review_data, headers)
    log.info('%s: Review data: %s', change_number, review_data)
    try:
        response = urlopen(req)
    except HTTPError as e:
        # log.info('Error posting review: %s %s', str(e.code), e.reason)
        if e.code == 409:
            # Lock failure. Try again for up to 10 times recursively.
            if retry < 10:
                log.info('%s: Retrying due to 409 Lock Failure...', change_number)
                await asyncio.sleep(5)
                await post_review(data, review, retry + 1)
    except URLError as e:
        log.info('Error posting review: %s', str(e.reason))
    else:
        log.info('%s: Review posted successfully.', change_number)


async def handle(request):
    """ Handle the incoming webhook request. """
    body = await request.text()
    data = json.loads(body)

    # Make sure the change is in state NEW
    if data['change']['status'] != 'NEW':
        return web.Response(status=200)

    # make sure it's a patchset-created event
    if data['type'] != 'patchset-created':
        return web.Response(status=200)

    # Only act on pyside repos.
    if not data['change']['project'].startswith('pyside'):
        return web.Response(status=200)

    data['repo_name'] = urlparse(data['change']['project']).path.split('/')[-1]

    log.info("%s: Received webhook for %s", data['change']['number'], data['patchSet']['revision'])

    # Request a lock on the git repo
    try:
        log.info("%s: Acquiring lock", data['change']['number'])
        await semaphore.acquire()
        await clone_repo(data)
        await checkout_patch(data)
        issues, file_count = await run_flake8(data)
    except Exception as e:
        log.error("Error: %s", str(e))
        return web.Response(status=200)
    finally:
        log.info("%s: Releasing lock", data['change']['number'])
        semaphore.release()

    # create a review with the comments if any python files were reviewed
    if file_count > 0:
        review = generate_review(issues, data['change']['number'])
        await post_review(data, review)

    return web.Response(status=200)

async def getStatus(request):
    return web.Response(text='OK')


async def run_web_server():
    """ Run the web server. """
    app = web.Application()
    app.add_routes([web.get('/status', getStatus)])
    app.add_routes([web.post('/', handle)])
    runner = web.AppRunner(app)
    await runner.setup()
    port = os.environ.get("PORT") or 8088
    site = web.TCPSite(runner, '0.0.0.0', port)
    await site.start()
    log.info("Web server started on port %s", port)

loop = asyncio.new_event_loop()
loop.create_task(run_web_server())
loop.run_forever()