# Copyright (C) 2021 The Qt Company Ltd. # Contact: https://www.qt.io/licensing/ # # You may use this file under the terms of the CC0 license. # See the file LICENSE.CC0 from this package for details. import argparse import logging import os import re import subprocess import sys from pathlib import Path logging.basicConfig(level=logging.INFO) log = logging.getLogger("get_data") class cd: def __init__(self, path): self.path = path def __enter__(self): self.saved = os.getcwd() os.chdir(self.path) def __exit__(self, etype, value, traceback): os.chdir(self.saved) def get_qt_modules(qt_path): def is_qt(x): valid_dir = (x.startswith("qt") or "pyside-setup" in x) return valid_dir and os.path.isdir(os.path.join(qt_path, x)) return sorted([i for i in os.listdir(qt_path) if is_qt(i)]) def get_email_domain(x): x = x.replace('"', "") if x.count("@") == 0: try: v = x.split(".")[-2] except IndexError: v = "" else: v = ".".join(x.split("@")[1].split(".")[:-1]).replace("\\", "") if v in ("theqtcompany", "qt", "nokia", "nokiamail", "digia"): new_email = re.sub("@.*", "@qt.io", x) return new_email, "qt" return x, v def process_git_log_line(line): changed = insertions = deletions = 0 # files changed re_changed = re.search(r"(\d+) files? changed", line) if re_changed: changed = re_changed.group(1) # insertions re_insertions = re.search(r"(\d+) insertions?", line) if re_insertions: insertions = re_insertions.group(1) # deletions re_deletions = re.search(r"(\d+) deletions?", line) if re_deletions: deletions = re_deletions.group(1) original_line = line.split("‽")[0] # the last field is the 'email' original_email = original_line.split(";")[-1] email, domain = get_email_domain(original_email) return f'{original_line};"{email}";"{domain}";"{changed}";"{insertions}";"{deletions}"' def git_log(): def is_valid_line(x): if ( x.strip() and "Qt by Nokia" not in x and "Qt Forward Merge Bot" not in x and "Qt Submodule Update Bot" not in x ): return True return False # We can do this process with: # git log --all --no-merges --date=format:'%Y-%m-%d' # --pretty=format:'µ"%cd";"%h";"%an";"%ce"' # --shortstat | tr '\n' ' ' | tr 'µ' '\n' | # sed 's/\ *\(\d+\)\ files\{0,1\}/\1/g' # But we will use only Python to perform those pipe operations. # This command has a trick to get the 'shortstats' on the same line # when processing the lines. Notice the 'µ' character that depicts the # beginning of the line. Additionally we use an interrobang '‽' # to depict the end of the git log, so we can add the 'files changed', # 'insertions', and 'deletions' at the end. o = subprocess.run( ( "git log --all --no-merges " "--date=format:'%Y-%m-%d' " '--pretty=format:\'µ"%cd";"%h";"%an";"%ae"\'‽ ' "--shortstat " ).split(), capture_output=True, universal_newlines=True, encoding="utf-8", errors="ignore", ).stdout o = o.replace("\n", " ").replace("µ", "\n").replace("'", "") return "\n".join(process_git_log_line(line) for line in o.splitlines() if is_valid_line(line)) def check_arguments(options): qt_path = Path(options.qt_dir) if qt_path.is_dir(): return True log.error(f"'{qt_path}' is not a directory.") return False def is_valid_module(m): if (m.name.startswith("qt") or str(m.name) in ("pyside-setup",)) and m.is_dir(): return True return False def process_qt_src(options): qt_path = Path(options.qt_dir) HEADER = "date;sha;name;original_email;email;domain;files_changed;insertions;deletions\n" for i in qt_path.glob("*"): if is_valid_module(i): log.info(f"Processing {i}...") output_csv = f"{Path(__file__).parent}/{i.name}.csv" out = None with cd(i): out = git_log() if not out: log.error("Empty 'git log' for i") continue with open(output_csv, "w") as f: f.write(HEADER) f.write(out) if __name__ == "__main__": parser = argparse.ArgumentParser(prog="get_data") parser.add_argument( "--qt", action="http://code.qt.io/store", dest="qt_dir", required=True, help="Path to a directory containing Qt modules, like the 'qt5' meta repository", ) options = parser.parse_args() if not check_arguments(options): parser.print_help() sys.exit(-1) # main process process_qt_src(options)