aboutsummaryrefslogtreecommitdiffstats
path: root/src/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.py')
-rw-r--r--src/main.py608
1 files changed, 608 insertions, 0 deletions
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..8ad4dc4
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,608 @@
+# Copyright (C) 2024 The Qt Company Ltd.
+# Contact: https://www.qt.io/licensing/
+#
+# You may use this file under the terms of the 3-clause BSD license.
+# See the file LICENSE in qt/qtrepotools for details.
+#
+
+"""This script listens for incoming webhook requests of patchset-created type
+ from Gerrit, checks out the patch locally, and checks security headers on it.
+ It then posts a comment for each issue identified to Gerrit with the results.
+"""
+
+import asyncio
+import base64
+import fnmatch
+import json
+import logging
+import os
+import re
+import shutil
+import sys
+import time
+import traceback
+from functools import wraps
+from logging.handlers import TimedRotatingFileHandler
+from subprocess import CalledProcessError
+
+import aiohttp
+from aiohttp import web
+
+# Configure logging
+LOG_DIR = "logging"
+os.makedirs(LOG_DIR, exist_ok=True)
+LOG_FILE = os.path.join(LOG_DIR, "qtsecuritybot.log")
+handler = TimedRotatingFileHandler(LOG_FILE, when='midnight', backupCount=90)
+handler.setFormatter(logging.Formatter(
+ '%(asctime)s - %(levelname)s - %(message)s'))
+logging.basicConfig(level=logging.INFO, handlers=[handler])
+log = logging.getLogger()
+
+GERRIT_USERNAME = os.environ.get('QTSECURITYBOT_GERRIT_USERNAME')
+GERRIT_PASSWORD = os.environ.get('QTSECURITYBOT_GERRIT_PASSWORD')
+
+if not GERRIT_USERNAME or not GERRIT_PASSWORD:
+ log.info(
+ 'Please set the QTSECURITYBOT_GERRIT_USERNAME and QTSECURITYBOT_GERRIT_PASSWORD environment variables.')
+ sys.exit(1)
+
+# Base64 encode the username and password
+GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD
+GERRIT_AUTH = GERRIT_AUTH.encode('utf-8')
+GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8')
+
+CONFIG = {
+ 'CLONE_TIMEOUT': 240,
+ 'CHECKOUT_TIMEOUT': 60,
+ 'MAX_RETRIES': 10,
+ 'RETRY_DELAY': 5,
+ 'MAX_LOCK_WAIT': 900, # 15 minutes,
+ 'TEAMS_URL': os.environ.get('QTSECURITYBOT_TEAMS_WEBHOOK_URL'),
+ 'TEAMS_ERROR_URL': os.environ.get('QTSECURITYBOT_TEAMS_ERROR_WEBHOOK_URL'),
+}
+
+
+class Lock:
+ """A simple lock class to prevent multiple events from
+ being handled at the same time.
+ """
+
+ def __init__(self):
+ self.locked = False
+ self._lock_time = 0
+
+ async def acquire(self):
+ """Acquire the lock with timeout."""
+ start_time = time.time()
+ while self.locked:
+ if time.time() - start_time > CONFIG['MAX_LOCK_WAIT']:
+ raise TimeoutError("Lock acquisition timed out")
+ await asyncio.sleep(1)
+ self.locked = True
+ self._lock_time = time.time()
+
+ def release(self):
+ """Release the lock."""
+ self.locked = False
+ self._lock_time = 0
+
+
+def log_errors(f):
+ """Decorator to log any unhandled errors in a function."""
+ @wraps(f)
+ async def wrapper(*args, **kwargs):
+ try:
+ return await f(*args, **kwargs)
+ except Exception as e:
+ log.error("Error in %s: %s\n%s", f.__name__,
+ str(e), traceback.format_exc())
+ raise
+ return wrapper
+
+
+semaphore = Lock()
+
+
+async def clone_repo(data):
+ """Clone the target repo and check out the branch if needed."""
+ log.info("Cloning repo %s", data['change']['project'])
+ repo_name = data['repo_name']
+
+ if os.path.exists(repo_name):
+ # return if the repo already exists
+ return
+ repo_url = "https://codereview.qt-project.org/" + \
+ data['change']['project'] + ".git"
+
+ if not shutil.which('git'):
+ raise FileNotFoundError("Git executable not found in PATH")
+
+ try:
+ p = await asyncio.create_subprocess_exec(
+ 'git', 'clone', repo_url, repo_name,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=CONFIG['CLONE_TIMEOUT'])
+
+ if p.returncode != 0:
+ raise CalledProcessError(
+ p.returncode, 'git clone', output=stdout, stderr=stderr)
+
+ log.info("Repository cloned successfully: %s", stdout.decode())
+ except FileNotFoundError as e:
+ log.error("Error: %s", e)
+ except asyncio.TimeoutError:
+ log.error("Error: Subprocess timed out")
+ except CalledProcessError as e:
+ raise e
+
+
+async def checkout_patch(data):
+ """Check out the patch."""
+
+ log.info("%s: Checking out patch", data['change']['number'])
+ # Check out the patch
+ repo_dir = data['repo_name']
+ # git clean -fdx first to remove any untracked files
+ p = await asyncio.create_subprocess_exec('git', '-C', repo_dir, 'clean', '-fdx')
+ await p.communicate()
+ try:
+ # Fetch the patch first
+ p = await asyncio.create_subprocess_exec(
+ 'git', '-C', repo_dir, 'fetch', 'origin', data['patchSet']['ref'],
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240)
+ if p.returncode != 0:
+ raise CalledProcessError(
+ p.returncode, 'git fetch', output=stdout, stderr=stderr)
+ log.info("Fetch successful: %s", stdout.decode())
+
+ p = await asyncio.create_subprocess_exec(
+ 'git', '-C', repo_dir, 'checkout', 'FETCH_HEAD',
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240)
+ if p.returncode != 0:
+ raise CalledProcessError(
+ p.returncode, 'git checkout', output=stdout, stderr=stderr)
+ log.info("Checkout successful: %s", stdout.decode())
+
+ except FileNotFoundError as e:
+ log.error("Error: %s", e)
+ except asyncio.TimeoutError:
+ log.error("Error: Subprocess timed out")
+ except CalledProcessError as e:
+ raise e
+
+
+async def run_security_header_check(data):
+ """Run securityHeader on the patch."""
+ log.info("%s: Running Security Header checks", data['change']['number'])
+ comments_per_file = {}
+
+ def _check_header_lines(lines, file_name, is_new_patch):
+ line_number = None
+ # Start line numbering at 1
+ for _line_number, line in enumerate(lines, 1):
+ if _line_number > 50:
+ break # Only check the first 50 lines
+
+ # Check for security headers. Match lazily. Enforcement is done via sanity bot.
+ comment_pattern = re.compile(
+ r'^\s*(#|\/\/|\/\*|\*|<!--|--)\s*Qt-Security\s*score:\s*critical'
+ )
+
+ for _line_number, line in enumerate(lines):
+ if comment_pattern.search(line):
+ line_number = _line_number
+ break
+
+ if line_number:
+ # Add the comment to the list of comments for this file
+ comments_per_file[file_name] = {
+ 'line': line_number,
+ 'message': "Modifying security sensitive file.",
+ 'side': 'REVISION' if is_new_patch else 'PARENT'
+ }
+ # End of _check_header_lines
+
+ # Get the list of files changed in this patch
+ try:
+ p = await asyncio.create_subprocess_exec(
+ 'git', '-C', data['repo_name'], 'diff-tree', '--no-commit-id',
+ '--name-status', '-r', 'FETCH_HEAD',
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240)
+ if p.returncode != 0:
+ raise CalledProcessError(
+ p.returncode, 'git diff-tree', output=stdout, stderr=stderr)
+ log.info("Diff successful: %s", stdout.decode())
+
+ except FileNotFoundError as e:
+ log.error("Error: %s", e)
+ except asyncio.TimeoutError:
+ log.error("Error: Subprocess timed out")
+ except CalledProcessError as e:
+ raise e
+ # Parse the output
+ changed_files = [] # Empty list to store file names
+ for line in stdout.decode().split('\n'):
+ if len(line) > 0 and not line.startswith("D"): # Skip empty lines and deleted files
+ # Add the file name to the list
+ changed_files.append(line.split('\t')[1])
+
+ ignore_patterns = []
+
+ # Run securityHeader on each file
+ for file in changed_files:
+ if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns):
+ continue # Skip this file if it matches any of the other ignore patterns
+
+ file_name = os.path.relpath(file, os.getcwd())
+
+ # Get how the file was modified (added, modified, or deleted)
+ try:
+ p = await asyncio.create_subprocess_exec(
+ 'git', '-C', data['repo_name'], 'diff-tree', '--no-commit-id',
+ '--name-status', '-r', 'FETCH_HEAD', '--', file,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240)
+ if p.returncode != 0:
+ raise CalledProcessError(p.returncode, 'git diff-tree',
+ output=stdout, stderr=stderr
+ )
+ log.info("File diff successful: %s", stdout.decode())
+
+ except FileNotFoundError as e:
+ log.error("Error: %s", e)
+ except asyncio.TimeoutError:
+ log.error("Error: Subprocess timed out")
+ except CalledProcessError as e:
+ raise e
+
+ change_type = None
+ for line in stdout.decode().split('\n'):
+ if len(line) > 0:
+ change_type = line.split('\t')[0]
+
+ # Mofified files need to be checked for header in both the current and previous versions
+
+ try:
+ # For added files and modified files, Open the file and scan for security headers.
+ if change_type == 'A' or change_type == 'M':
+ with open(os.path.join(data['repo_name'], file_name), 'r', encoding='utf-8') as f:
+ lines = f.readlines(8192)
+ _check_header_lines(lines, file_name, True)
+
+ # For deleted files or modified files, use the previous version of the file.
+ if file_name not in comments_per_file and file_name in changed_files:
+ if change_type == 'D' or change_type == 'M':
+ try:
+ p = await asyncio.create_subprocess_exec(
+ 'git', '-C', data['repo_name'], 'show', 'FETCH_HEAD~1:' + file,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await asyncio.wait_for(p.communicate(), timeout=240)
+ if p.returncode != 0:
+ raise CalledProcessError(p.returncode, 'git show',
+ output=stdout, stderr=stderr
+ )
+ log.debug("Show file successful: %s", stdout.decode())
+ del_file = stdout.decode()
+ lines = del_file.split('\n')
+ _check_header_lines(lines, file_name, False)
+ except FileNotFoundError as e:
+ log.error("Error: %s", e)
+ except asyncio.TimeoutError:
+ log.error("Error: Subprocess timed out")
+ except CalledProcessError as e:
+ raise e
+ except UnicodeDecodeError as e:
+ log.warning("WARN: File %s not valid utf-8 encoding", file_name)
+ except IsADirectoryError as e:
+ log.warning("WARN: File %s is a directory", file_name)
+
+ if not comments_per_file:
+ return None
+
+ log.info("%s: Comments: %s", data['change']
+ ['number'], json.dumps(comments_per_file))
+ return comments_per_file
+
+
+def generate_review(comments_per_file):
+ """Generate a review from the comments."""
+
+ log.info("Generating review")
+ if not comments_per_file or len(comments_per_file.keys()) == 0:
+ return None
+ comment_inputs = {}
+ for file_name, comment in comments_per_file.items():
+ if file_name not in comment_inputs:
+ comment_inputs[file_name] = []
+ comment_inputs[file_name].append({
+ 'line': comment['line'],
+ 'message': comment['message'],
+ 'side': comment['side'],
+ 'unresolved': 'false'
+ })
+ review = {
+ 'comments': comment_inputs
+ }
+ return review
+
+
+async def post_review(data, review, retry=0):
+ """Post the review to Gerrit."""
+ change_number = data['change']['number']
+ revision = data['patchSet']['revision']
+ url = f"https://codereview.qt-project.org/a/changes/%7Bchange_number%7D/revisions/%7Brevision%7D/review"
+ headers = {'Content-Type': 'application/json;charset=UTF-8',
+ 'Authorization': 'Basic ' + GERRIT_AUTH} # Ensure GERRIT_AUTH is defined
+
+ log.info("%s: Posting review", change_number)
+ log.info('%s: Review data: %s', change_number, json.dumps(review))
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(url, json=review, headers=headers) as response:
+ if response.status == 409 and retry < 10:
+ log.info(
+ '%s: Retrying due to 409 Lock Failure...', change_number)
+ await asyncio.sleep(5)
+ await post_review(data, review, retry + 1)
+ elif response.status >= 400:
+ response_text = await response.text()
+ log.info('Error posting review: %s %s',
+ response.status, response_text)
+ else:
+ log.info('%s: Review posted successfully.', change_number)
+ except aiohttp.ClientError as e:
+ log.info('Error posting review: %s', str(e))
+
+
+async def post_hashtag(data, retry=0):
+ """Post the review to Gerrit."""
+ change_number = data['change']['number']
+ url = f"https://codereview.qt-project.org/a/changes/%7Bchange_number%7D/hashtags"
+ headers = {'Content-Type': 'application/json;charset=UTF-8',
+ 'Authorization': 'Basic ' + GERRIT_AUTH}
+ hashtags = {"add": ["Qt-Security change"]}
+
+ log.info('%s: Review data: %s', change_number, hashtags)
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(url, json=hashtags, headers=headers) as response:
+ if response.status == 409 and retry < 10:
+ log.info(
+ '%s: Retrying due to 409 Lock Failure...', change_number)
+ await asyncio.sleep(5)
+ await post_hashtag(data, retry + 1)
+ elif response.status >= 400:
+ response_text = await response.text()
+ log.info('Error posting review: %s %s',
+ response.status, response_text)
+ else:
+ log.info('%s: Hashtag posted successfully.', change_number)
+ except aiohttp.ClientError as e:
+ log.info('Error posting hashtag: %s', str(e))
+
+
+async def post_teams_message(data, retry=0):
+ """Post a simple message to Teams with the change details."""
+ if not CONFIG['TEAMS_URL']:
+ log.info('Teams webhook URL not set. Skipping message posting.')
+ return
+ headers = {
+ 'Content-Type': 'application/json'
+ }
+ card = {
+ "@type": "MessageCard",
+ "@context": "http://schema.org/extensions",
+ "summary": "New Security Relevant Change",
+ "themeColor": "0076D7",
+ "title": data['change']['subject'],
+ "sections": [{
+ "activityTitle": data['change']['subject'],
+ "activitySubtitle": f"Change number: {data['change']['number']}",
+ "facts": [{
+ "name": "Project:",
+ "value": data['change']['project']
+ }, {
+ "name": "Branch:",
+ "value": data['change']['branch']
+ }],
+ "markdown": True
+ }],
+ "potentialAction": [{
+ "@type": "OpenUri",
+ "name": "View Change",
+ "targets": [{
+ "os": "default",
+ "uri": data['change']['url']
+ }]
+ }]
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(CONFIG['TEAMS_URL'], json=card, headers=headers) as response:
+ if response.status == 408 and retry < 10:
+ log.info('%s: Retrying due to 408 Request Timeout...',
+ data['change']['number'])
+ await asyncio.sleep(5)
+ await post_teams_message(data, retry + 1)
+ elif response.status >= 400:
+ response_text = await response.text()
+ log.info('Error posting Teams message: %s %s',
+ response.status, response_text)
+ else:
+ log.info('%s: Teams message posted successfully.',
+ data['change']['number'])
+ except aiohttp.ClientError as e:
+ log.info('Error posting Teams message: %s', str(e))
+
+
+async def post_teams_error_message(data, custom_text, retry=0):
+ """Post an error message to Teams with the change details for diagnostic purposes."""
+ if not CONFIG['TEAMS_ERROR_URL']:
+ log.info('Teams error webhook URL not set. Skipping error message posting.')
+ return
+ headers = {
+ 'Content-Type': 'application/json'
+ }
+ card = {
+ "@type": "MessageCard",
+ "@context": "http://schema.org/extensions",
+ "summary": "New Change",
+ "themeColor": "0076D7",
+ "title": data['change']['subject'],
+ "text": custom_text,
+ "sections": [{
+ "activityTitle": data['change']['subject'],
+ "activitySubtitle": f"Change number: {data['change']['number']}",
+ "facts": [{
+ "name": "Project:",
+ "value": data['change']['project']
+ }, {
+ "name": "Branch:",
+ "value": data['change']['branch']
+ }],
+ "markdown": True
+ }],
+ "potentialAction": [{
+ "@type": "OpenUri",
+ "name": "View Change",
+ "targets": [{
+ "os": "default",
+ "uri": data['change']['url']
+ }]
+ }]
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(CONFIG['TEAMS_ERROR_URL'], json=card, headers=headers) as res:
+ if res.status == 408 and retry < 10:
+ log.info('%s: Retrying due to 408 Request Timeout...',
+ data['change']['number'])
+ await asyncio.sleep(5)
+ await post_teams_message(data, retry + 1)
+ elif res.status >= 400:
+ response_text = await res.text()
+ log.info('Error posting Teams message: %s %s',
+ res.status, response_text)
+ else:
+ log.info('%s: Teams message posted successfully.',
+ data['change']['number'])
+ except aiohttp.ClientError as e:
+ log.info('Error posting Teams message: %s', str(e))
+
+
+@log_errors
+async def handle(request):
+ """Handle the incoming webhook request."""
+ # Wrap the whole function in case the request is malformed.
+ try:
+ body = await request.text()
+ data = json.loads(body)
+
+ # Validate request
+ required_fields = ['type', 'change', 'patchSet']
+ if not all(field in data for field in required_fields):
+ return web.Response(status=400, text="Missing required fields")
+
+ # Make sure the change is in state NEW
+ if data['change']['status'] != 'NEW':
+ return web.Response(status=204)
+
+ # make sure it's a patchset-created event
+ if data['type'] != 'patchset-created':
+ return web.Response(status=204)
+
+ # Only act on pyside repos.
+ if not data['change']['project'].startswith('pyside'):
+ return web.Response(status=204)
+
+ data['repo_name'] = data['change']['project'].split('/')[-1]
+
+ log.info("%s: Received webhook for %s",
+ data['change']['number'], data['patchSet']['revision'])
+
+ # Request a lock on the git repo
+ try:
+ log.debug("%s: Acquiring lock", data['change']['number'])
+ await semaphore.acquire()
+ await clone_repo(data)
+ await checkout_patch(data)
+ comments = await run_security_header_check(data)
+ except Exception as e:
+ log.error("Error: %s", str(e))
+ await post_teams_error_message(data, str(e))
+ return web.Response(status=500, body=str(e))
+ finally:
+ log.info("%s: Releasing lock", data['change']['number'])
+ semaphore.release()
+
+ # create a review with the comments if any python files were reviewed
+ if comments:
+ review = generate_review(comments)
+ await post_review(data, review)
+ await post_hashtag(data)
+ # Post a message to Teams about the Security relevant change.
+ # Re-enable this line to post to Teams about security Changes
+ # It is currently disabled to prevent spamming users.
+ # await post_teams_message(data)
+ else:
+ log.info("%s: Change not security relevant",
+ data['change']['number'])
+
+ return web.Response(status=200)
+ except json.JSONDecodeError:
+ return web.Response(status=400, text="Invalid JSON")
+ except Exception as e:
+ await post_teams_error_message(data, f"{str(e)}\n{traceback.format_exc()}")
+ return web.Response(status=500)
+
+
+async def handle_status(req):
+ """Handle the status request."""
+ return web.Response(status=200)
+
+
+async def run_web_server():
+ """Run the web server."""
+ app = web.Application()
+ app.add_routes([web.get('/status', handle_status)])
+ app.add_routes([web.post('/', handle)])
+ runner = web.AppRunner(app)
+ await runner.setup()
+ port = os.environ.get("QTSECURITYBOT_PORT") or 8088
+ site = web.TCPSite(runner, '0.0.0.0', port)
+ await site.start()
+ log.info("Web server started on port %s", port)
+
+
+def main():
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ try:
+ loop.run_until_complete(run_web_server())
+ loop.run_forever()
+ except KeyboardInterrupt:
+ pass
+ finally:
+ loop.close()
+
+
+if __name__ == "__main__":
+ main()