diff options
author | Daniel Smith <[email protected]> | 2024-06-27 13:22:12 +0200 |
---|---|---|
committer | Daniel Smith <[email protected]> | 2025-05-20 09:51:48 +0000 |
commit | 1ed01b4a1e5172090d71e472ccb721777cacab27 (patch) | |
tree | a496c6816944e9d4e94997c7058e41b6bfde4db5 | |
parent | 05d173273e8d66db2d7a608ad846865a01d6776a (diff) |
This bot checks files for each patchset and tags the change with a
hashtag if it touches a file that is marked as security sensitive.
Task-number: QTQAINFRA-6431
Change-Id: I09f0c2fb646fa902a2278771f48a5401f4770586
Reviewed-by: Daniel Smith <[email protected]>
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | qtSecurity_bot.service | 22 | ||||
-rw-r--r-- | src/Pipfile | 13 | ||||
-rw-r--r-- | src/main.py | 608 |
4 files changed, 680 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..54779c6 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Qt securityHeader Bot + +As per [QUIP-23](https://contribute.qt-project.org/quips/23), +This bot executes Security Header checks on all files as they are uploaded in a patchset. + +It examines the first 50 lines (limited to the first 8KB) of changed files in a patchset +for the security keyphrase: +> // Qt-Security score:critical + +In the event that a file is modified or deleted, both the current patch and previous +version will be checked for the security keyphrase to ensure that it is not being +inappropriately removed or downgraded. + +If a security-critical file is added, modified or deleted, the bot will: +- Post a hashtag ["Qt-Security change"](https://codereview.qt-project.org/q/hashtag:%22Qt-Security+change%22) + to the gerrit change request. + + Changes with this hashtag will display a large red banner to notify + reviewers that the change deserves extra scrutiny. + + +### Exclusions +- All non-utf-8 encoded files (images, archives, other binary file formats) + +## Installation +To install this script as a service +1. Copy the service file to the systemd directory of your choice such as `/etc/systemd/system/`. +2. Reload the daemon with `systemctl daemon-reload`. +3. Run `systemctl edit qtSecurity_bot` to generate an override config. Set environment variables here. +4. Start the service (Default port=8088, override with `QTSECURITYBOT_PORT`). + +## Prerequsites +1. This bot is designed to receive webhooks from Gerrit Code Review. See +[Gerrit Webhooks](https://gerrit.googlesource.com/plugins/webhooks/+/refs/heads/master/src/main/resources/Documentation/config.md) +2. The included systemd service file assumes you have `pipenv` installed for the `qt` user. +3. You must manually install required packaged into the pipenv, as the service does not do this + automatically. diff --git a/qtSecurity_bot.service b/qtSecurity_bot.service new file mode 100644 index 0000000..df252e6 --- /dev/null +++ b/qtSecurity_bot.service @@ -0,0 +1,22 @@ +[Unit] +Description=securityHeader Bot +After=network.target + +[Service] +ExecStart=/home/qt/.local/bin/pipenv run python3 main.py +WorkingDirectory=/home/qt/qt-securityHeader-bot/src +User=qt +Group=qt +Restart=always + +# It is recommended to set environment variables in an override.conf file +# readable only to the user to protect secrets. +Environment="QTSECURITYBOT_GERRIT_USERNAME=foo" +Environment="QTSECURITYBOT_GERRIT_PASSWORD=bar" +Environment="QTSECURITYBOT_TEAMS_WEBHOOK_URL=" +Environment="QTSECURITYBOT_TEAMS_ERROR_WEBHOOK_URL=" +Environment="QTSECURITYBOT_PORT=8090" + + +[Install] +WantedBy=multi-user.target diff --git a/src/Pipfile b/src/Pipfile new file mode 100644 index 0000000..945a2b2 --- /dev/null +++ b/src/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +aiohttp = "*" +systemd-python = "*" + +[dev-packages] + +[requires] +python_version = "3.10" diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..8ad4dc4 --- /dev/null +++ b/src/main.py @@ -0,0 +1,608 @@ +# Copyright (C) 2024 The Qt Company Ltd. +# Contact: https://www.qt.io/licensing/ +# +# You may use this file under the terms of the 3-clause BSD license. +# See the file LICENSE in qt/qtrepotools for details. +# + +"""This script listens for incoming webhook requests of patchset-created type + from Gerrit, checks out the patch locally, and checks security headers on it. + It then posts a comment for each issue identified to Gerrit with the results. +""" + +import asyncio +import base64 +import fnmatch +import json +import logging +import os +import re +import shutil +import sys +import time +import traceback +from functools import wraps +from logging.handlers import TimedRotatingFileHandler +from subprocess import CalledProcessError + +import aiohttp +from aiohttp import web + +# Configure logging +LOG_DIR = "logging" +os.makedirs(LOG_DIR, exist_ok=True) +LOG_FILE = os.path.join(LOG_DIR, "qtsecuritybot.log") +handler = TimedRotatingFileHandler(LOG_FILE, when='midnight', backupCount=90) +handler.setFormatter(logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s')) +logging.basicConfig(level=logging.INFO, handlers=[handler]) +log = logging.getLogger() + +GERRIT_USERNAME = os.environ.get('QTSECURITYBOT_GERRIT_USERNAME') +GERRIT_PASSWORD = os.environ.get('QTSECURITYBOT_GERRIT_PASSWORD') + +if not GERRIT_USERNAME or not GERRIT_PASSWORD: + log.info( + 'Please set the QTSECURITYBOT_GERRIT_USERNAME and QTSECURITYBOT_GERRIT_PASSWORD environment variables.') + sys.exit(1) + +# Base64 encode the username and password +GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD +GERRIT_AUTH = GERRIT_AUTH.encode('utf-8') +GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8') + +CONFIG = { + 'CLONE_TIMEOUT': 240, + 'CHECKOUT_TIMEOUT': 60, + 'MAX_RETRIES': 10, + 'RETRY_DELAY': 5, + 'MAX_LOCK_WAIT': 900, # 15 minutes, + 'TEAMS_URL': os.environ.get('QTSECURITYBOT_TEAMS_WEBHOOK_URL'), + 'TEAMS_ERROR_URL': os.environ.get('QTSECURITYBOT_TEAMS_ERROR_WEBHOOK_URL'), +} + + +class Lock: + """A simple lock class to prevent multiple events from + being handled at the same time. + """ + + def __init__(self): + self.locked = False + self._lock_time = 0 + + async def acquire(self): + """Acquire the lock with timeout.""" + start_time = time.time() + while self.locked: + if time.time() - start_time > CONFIG['MAX_LOCK_WAIT']: + raise TimeoutError("Lock acquisition timed out") + await asyncio.sleep(1) + self.locked = True + self._lock_time = time.time() + + def release(self): + """Release the lock.""" + self.locked = False + self._lock_time = 0 + + +def log_errors(f): + """Decorator to log any unhandled errors in a function.""" + @wraps(f) + async def wrapper(*args, **kwargs): + try: + return await f(*args, **kwargs) + except Exception as e: + log.error("Error in %s: %s\n%s", f.__name__, + str(e), traceback.format_exc()) + raise + return wrapper + + +semaphore = Lock() + + +async def clone_repo(data): + """Clone the target repo and check out the branch if needed.""" + log.info("Cloning repo %s", data['change']['project']) + repo_name = data['repo_name'] + + if os.path.exists(repo_name): + # return if the repo already exists + return + repo_url = "https://codereview.qt-project.org/" + \ + data['change']['project'] + ".git" + + if not shutil.which('git'): + raise FileNotFoundError("Git executable not found in PATH") + + try: + p = await asyncio.create_subprocess_exec( + 'git', 'clone', repo_url, repo_name, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=CONFIG['CLONE_TIMEOUT']) + + if p.returncode != 0: + raise CalledProcessError( + p.returncode, 'git clone', output=stdout, stderr=stderr) + + log.info("Repository cloned successfully: %s", stdout.decode()) + except FileNotFoundError as e: + log.error("Error: %s", e) + except asyncio.TimeoutError: + log.error("Error: Subprocess timed out") + except CalledProcessError as e: + raise e + + +async def checkout_patch(data): + """Check out the patch.""" + + log.info("%s: Checking out patch", data['change']['number']) + # Check out the patch + repo_dir = data['repo_name'] + # git clean -fdx first to remove any untracked files + p = await asyncio.create_subprocess_exec('git', '-C', repo_dir, 'clean', '-fdx') + await p.communicate() + try: + # Fetch the patch first + p = await asyncio.create_subprocess_exec( + 'git', '-C', repo_dir, 'fetch', 'origin', data['patchSet']['ref'], + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240) + if p.returncode != 0: + raise CalledProcessError( + p.returncode, 'git fetch', output=stdout, stderr=stderr) + log.info("Fetch successful: %s", stdout.decode()) + + p = await asyncio.create_subprocess_exec( + 'git', '-C', repo_dir, 'checkout', 'FETCH_HEAD', + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240) + if p.returncode != 0: + raise CalledProcessError( + p.returncode, 'git checkout', output=stdout, stderr=stderr) + log.info("Checkout successful: %s", stdout.decode()) + + except FileNotFoundError as e: + log.error("Error: %s", e) + except asyncio.TimeoutError: + log.error("Error: Subprocess timed out") + except CalledProcessError as e: + raise e + + +async def run_security_header_check(data): + """Run securityHeader on the patch.""" + log.info("%s: Running Security Header checks", data['change']['number']) + comments_per_file = {} + + def _check_header_lines(lines, file_name, is_new_patch): + line_number = None + # Start line numbering at 1 + for _line_number, line in enumerate(lines, 1): + if _line_number > 50: + break # Only check the first 50 lines + + # Check for security headers. Match lazily. Enforcement is done via sanity bot. + comment_pattern = re.compile( + r'^\s*(#|\/\/|\/\*|\*|<!--|--)\s*Qt-Security\s*score:\s*critical' + ) + + for _line_number, line in enumerate(lines): + if comment_pattern.search(line): + line_number = _line_number + break + + if line_number: + # Add the comment to the list of comments for this file + comments_per_file[file_name] = { + 'line': line_number, + 'message': "Modifying security sensitive file.", + 'side': 'REVISION' if is_new_patch else 'PARENT' + } + # End of _check_header_lines + + # Get the list of files changed in this patch + try: + p = await asyncio.create_subprocess_exec( + 'git', '-C', data['repo_name'], 'diff-tree', '--no-commit-id', + '--name-status', '-r', 'FETCH_HEAD', + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240) + if p.returncode != 0: + raise CalledProcessError( + p.returncode, 'git diff-tree', output=stdout, stderr=stderr) + log.info("Diff successful: %s", stdout.decode()) + + except FileNotFoundError as e: + log.error("Error: %s", e) + except asyncio.TimeoutError: + log.error("Error: Subprocess timed out") + except CalledProcessError as e: + raise e + # Parse the output + changed_files = [] # Empty list to store file names + for line in stdout.decode().split('\n'): + if len(line) > 0 and not line.startswith("D"): # Skip empty lines and deleted files + # Add the file name to the list + changed_files.append(line.split('\t')[1]) + + ignore_patterns = [] + + # Run securityHeader on each file + for file in changed_files: + if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns): + continue # Skip this file if it matches any of the other ignore patterns + + file_name = os.path.relpath(file, os.getcwd()) + + # Get how the file was modified (added, modified, or deleted) + try: + p = await asyncio.create_subprocess_exec( + 'git', '-C', data['repo_name'], 'diff-tree', '--no-commit-id', + '--name-status', '-r', 'FETCH_HEAD', '--', file, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240) + if p.returncode != 0: + raise CalledProcessError(p.returncode, 'git diff-tree', + output=stdout, stderr=stderr + ) + log.info("File diff successful: %s", stdout.decode()) + + except FileNotFoundError as e: + log.error("Error: %s", e) + except asyncio.TimeoutError: + log.error("Error: Subprocess timed out") + except CalledProcessError as e: + raise e + + change_type = None + for line in stdout.decode().split('\n'): + if len(line) > 0: + change_type = line.split('\t')[0] + + # Mofified files need to be checked for header in both the current and previous versions + + try: + # For added files and modified files, Open the file and scan for security headers. + if change_type == 'A' or change_type == 'M': + with open(os.path.join(data['repo_name'], file_name), 'r', encoding='utf-8') as f: + lines = f.readlines(8192) + _check_header_lines(lines, file_name, True) + + # For deleted files or modified files, use the previous version of the file. + if file_name not in comments_per_file and file_name in changed_files: + if change_type == 'D' or change_type == 'M': + try: + p = await asyncio.create_subprocess_exec( + 'git', '-C', data['repo_name'], 'show', 'FETCH_HEAD~1:' + file, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240) + if p.returncode != 0: + raise CalledProcessError(p.returncode, 'git show', + output=stdout, stderr=stderr + ) + log.debug("Show file successful: %s", stdout.decode()) + del_file = stdout.decode() + lines = del_file.split('\n') + _check_header_lines(lines, file_name, False) + except FileNotFoundError as e: + log.error("Error: %s", e) + except asyncio.TimeoutError: + log.error("Error: Subprocess timed out") + except CalledProcessError as e: + raise e + except UnicodeDecodeError as e: + log.warning("WARN: File %s not valid utf-8 encoding", file_name) + except IsADirectoryError as e: + log.warning("WARN: File %s is a directory", file_name) + + if not comments_per_file: + return None + + log.info("%s: Comments: %s", data['change'] + ['number'], json.dumps(comments_per_file)) + return comments_per_file + + +def generate_review(comments_per_file): + """Generate a review from the comments.""" + + log.info("Generating review") + if not comments_per_file or len(comments_per_file.keys()) == 0: + return None + comment_inputs = {} + for file_name, comment in comments_per_file.items(): + if file_name not in comment_inputs: + comment_inputs[file_name] = [] + comment_inputs[file_name].append({ + 'line': comment['line'], + 'message': comment['message'], + 'side': comment['side'], + 'unresolved': 'false' + }) + review = { + 'comments': comment_inputs + } + return review + + +async def post_review(data, review, retry=0): + """Post the review to Gerrit.""" + change_number = data['change']['number'] + revision = data['patchSet']['revision'] + url = f"https://codereview.qt-project.org/a/changes/%7Bchange_number%7D/revisions/%7Brevision%7D/review" + headers = {'Content-Type': 'application/json;charset=UTF-8', + 'Authorization': 'Basic ' + GERRIT_AUTH} # Ensure GERRIT_AUTH is defined + + log.info("%s: Posting review", change_number) + log.info('%s: Review data: %s', change_number, json.dumps(review)) + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, json=review, headers=headers) as response: + if response.status == 409 and retry < 10: + log.info( + '%s: Retrying due to 409 Lock Failure...', change_number) + await asyncio.sleep(5) + await post_review(data, review, retry + 1) + elif response.status >= 400: + response_text = await response.text() + log.info('Error posting review: %s %s', + response.status, response_text) + else: + log.info('%s: Review posted successfully.', change_number) + except aiohttp.ClientError as e: + log.info('Error posting review: %s', str(e)) + + +async def post_hashtag(data, retry=0): + """Post the review to Gerrit.""" + change_number = data['change']['number'] + url = f"https://codereview.qt-project.org/a/changes/%7Bchange_number%7D/hashtags" + headers = {'Content-Type': 'application/json;charset=UTF-8', + 'Authorization': 'Basic ' + GERRIT_AUTH} + hashtags = {"add": ["Qt-Security change"]} + + log.info('%s: Review data: %s', change_number, hashtags) + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, json=hashtags, headers=headers) as response: + if response.status == 409 and retry < 10: + log.info( + '%s: Retrying due to 409 Lock Failure...', change_number) + await asyncio.sleep(5) + await post_hashtag(data, retry + 1) + elif response.status >= 400: + response_text = await response.text() + log.info('Error posting review: %s %s', + response.status, response_text) + else: + log.info('%s: Hashtag posted successfully.', change_number) + except aiohttp.ClientError as e: + log.info('Error posting hashtag: %s', str(e)) + + +async def post_teams_message(data, retry=0): + """Post a simple message to Teams with the change details.""" + if not CONFIG['TEAMS_URL']: + log.info('Teams webhook URL not set. Skipping message posting.') + return + headers = { + 'Content-Type': 'application/json' + } + card = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "summary": "New Security Relevant Change", + "themeColor": "0076D7", + "title": data['change']['subject'], + "sections": [{ + "activityTitle": data['change']['subject'], + "activitySubtitle": f"Change number: {data['change']['number']}", + "facts": [{ + "name": "Project:", + "value": data['change']['project'] + }, { + "name": "Branch:", + "value": data['change']['branch'] + }], + "markdown": True + }], + "potentialAction": [{ + "@type": "OpenUri", + "name": "View Change", + "targets": [{ + "os": "default", + "uri": data['change']['url'] + }] + }] + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(CONFIG['TEAMS_URL'], json=card, headers=headers) as response: + if response.status == 408 and retry < 10: + log.info('%s: Retrying due to 408 Request Timeout...', + data['change']['number']) + await asyncio.sleep(5) + await post_teams_message(data, retry + 1) + elif response.status >= 400: + response_text = await response.text() + log.info('Error posting Teams message: %s %s', + response.status, response_text) + else: + log.info('%s: Teams message posted successfully.', + data['change']['number']) + except aiohttp.ClientError as e: + log.info('Error posting Teams message: %s', str(e)) + + +async def post_teams_error_message(data, custom_text, retry=0): + """Post an error message to Teams with the change details for diagnostic purposes.""" + if not CONFIG['TEAMS_ERROR_URL']: + log.info('Teams error webhook URL not set. Skipping error message posting.') + return + headers = { + 'Content-Type': 'application/json' + } + card = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "summary": "New Change", + "themeColor": "0076D7", + "title": data['change']['subject'], + "text": custom_text, + "sections": [{ + "activityTitle": data['change']['subject'], + "activitySubtitle": f"Change number: {data['change']['number']}", + "facts": [{ + "name": "Project:", + "value": data['change']['project'] + }, { + "name": "Branch:", + "value": data['change']['branch'] + }], + "markdown": True + }], + "potentialAction": [{ + "@type": "OpenUri", + "name": "View Change", + "targets": [{ + "os": "default", + "uri": data['change']['url'] + }] + }] + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(CONFIG['TEAMS_ERROR_URL'], json=card, headers=headers) as res: + if res.status == 408 and retry < 10: + log.info('%s: Retrying due to 408 Request Timeout...', + data['change']['number']) + await asyncio.sleep(5) + await post_teams_message(data, retry + 1) + elif res.status >= 400: + response_text = await res.text() + log.info('Error posting Teams message: %s %s', + res.status, response_text) + else: + log.info('%s: Teams message posted successfully.', + data['change']['number']) + except aiohttp.ClientError as e: + log.info('Error posting Teams message: %s', str(e)) + + +@log_errors +async def handle(request): + """Handle the incoming webhook request.""" + # Wrap the whole function in case the request is malformed. + try: + body = await request.text() + data = json.loads(body) + + # Validate request + required_fields = ['type', 'change', 'patchSet'] + if not all(field in data for field in required_fields): + return web.Response(status=400, text="Missing required fields") + + # Make sure the change is in state NEW + if data['change']['status'] != 'NEW': + return web.Response(status=204) + + # make sure it's a patchset-created event + if data['type'] != 'patchset-created': + return web.Response(status=204) + + # Only act on pyside repos. + if not data['change']['project'].startswith('pyside'): + return web.Response(status=204) + + data['repo_name'] = data['change']['project'].split('/')[-1] + + log.info("%s: Received webhook for %s", + data['change']['number'], data['patchSet']['revision']) + + # Request a lock on the git repo + try: + log.debug("%s: Acquiring lock", data['change']['number']) + await semaphore.acquire() + await clone_repo(data) + await checkout_patch(data) + comments = await run_security_header_check(data) + except Exception as e: + log.error("Error: %s", str(e)) + await post_teams_error_message(data, str(e)) + return web.Response(status=500, body=str(e)) + finally: + log.info("%s: Releasing lock", data['change']['number']) + semaphore.release() + + # create a review with the comments if any python files were reviewed + if comments: + review = generate_review(comments) + await post_review(data, review) + await post_hashtag(data) + # Post a message to Teams about the Security relevant change. + # Re-enable this line to post to Teams about security Changes + # It is currently disabled to prevent spamming users. + # await post_teams_message(data) + else: + log.info("%s: Change not security relevant", + data['change']['number']) + + return web.Response(status=200) + except json.JSONDecodeError: + return web.Response(status=400, text="Invalid JSON") + except Exception as e: + await post_teams_error_message(data, f"{str(e)}\n{traceback.format_exc()}") + return web.Response(status=500) + + +async def handle_status(req): + """Handle the status request.""" + return web.Response(status=200) + + +async def run_web_server(): + """Run the web server.""" + app = web.Application() + app.add_routes([web.get('/status', handle_status)]) + app.add_routes([web.post('/', handle)]) + runner = web.AppRunner(app) + await runner.setup() + port = os.environ.get("QTSECURITYBOT_PORT") or 8088 + site = web.TCPSite(runner, '0.0.0.0', port) + await site.start() + log.info("Web server started on port %s", port) + + +def main(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(run_web_server()) + loop.run_forever() + except KeyboardInterrupt: + pass + finally: + loop.close() + + +if __name__ == "__main__": + main() |