tests/scripts/release/make_notes.py

#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import pickle
from pathlib import Path
import csv
import sys
import re
from collections import defaultdict

REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
sys.path.append(str(REPO_ROOT / "tests" / "scripts"))
sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "github"))
sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "jenkins"))

# Tag dictionary used to create a mapping relation to categorize PRs owning same tag.
TAG_DICT = {
    "metaschedule": "MetaSchedule",
    "cuda": "cuda & cutlass & tensorrt",
    "cutlass": "cuda & cutlass & tensorrt",
    "tensorrt": "cuda & cutlass & tensorrt",
    "hexagon": "Hexagon",
    "metal": "Metal",
    "vulkan": "Vulkan",
    "clml": "OpenCL & CLML",
    "opencl": "OpenCL & CLML",
    "openclml": "OpenCL & CLML",
    "adreno": "Adreno",
    "acl": "ArmComputeLibrary",
    "rocm": "ROCm",
    "crt": "CRT",
    "web": "web",
    "wasm": "web",
    "runtime": "Runtime",
    "aot": "AOT",
    "arith": "Arith",
    "byoc": "BYOC",
    "community": "Community",
    "tensorir": "TIR",
    "tir": "TIR",
    "tensorflow": "Frontend",
    "tflite": "Frontend",
    "pytorch": "Frontend",
    "torch": "Frontend",
    "keras": "Frontend",
    "frontend": "Frontend",
    "onnx": "Frontend",
    "roofline": "Misc",
    "rpc": "Misc",
    "transform": "Misc",
    "tophub": "Misc",
    "ux": "Misc",
    "APP": "Misc",
    "docker": "Docker",
    "doc": "Docs",
    "docs": "Docs",
    "llvm": "LLVM",
    "sve": "LLVM",
    "ci": "CI",
    "test": "CI",
    "tests": "CI",
    "testing": "CI",
    "unittest": "CI",
    "bugfix": "BugFix",
    "fix": "BugFix",
    "bug": "BugFix",
    "hotfix": "BugFix",
    "qnn": "Relay",
    "quantization": "Relay",
    "relax": "Relax",
    "unity": "Relax",
    "transform": "Relax",
    "kvcache": "Relax",
    "dlight": "Dlight",
    "disco": "Disco",
    "tvmscript": "TVMScript",
    "tvmscripts": "TVMScript",
    "tvmc": "TVMC",
    "topi": "TOPI",
}


def strip_header(title: str, header: str) -> str:
    pos = title.lower().find(header.lower())
    if pos == -1:
        return title

    return title[0:pos] + title[pos + len(header) :].strip()


def sprint(*args):
    print(*args, file=sys.stderr)


def create_pr_dict(cache: Path):
    with open(cache, "rb") as f:
        data = pickle.load(f)

    sprint(data[1])
    pr_dict = {}
    for item in data:
        prs = item["associatedPullRequests"]["nodes"]
        if len(prs) != 1:
            continue

        pr = prs[0]
        pr_dict[pr["number"]] = pr
    return pr_dict


def categorize_csv_file(csv_path: str):
    headings = defaultdict(lambda: defaultdict(list))
    sprint("Opening CSV")
    with open(csv_path) as f:
        input_file = csv.DictReader(f)

        i = 0
        blank_cate_set = {"Misc"}
        for row in input_file:
            # print(row)
            tags = row["pr_title_tags"].split("/")
            tags = ["misc"] if len(tags) == 0 else tags

            categories = map(lambda t: TAG_DICT.get(t.lower(), "Misc"), tags)
            categories = list(categories)
            categories = list(set(categories) - blank_cate_set)
            category = "Misc" if len(categories) == 0 else categories[0]

            subject = row["subject"].strip()
            pr_number = row["url"].split("/")[-1]

            if category == "" or subject == "":
                sprint(f"Skipping {i}th pr with number: {pr_number}, row: {row}")
                continue

            headings[category][subject].append(pr_number)
            i += 1
            # if i > 30:
            #     break
    return headings


if __name__ == "__main__":
    help = "List out commits with attached PRs since a certain commit"
    parser = argparse.ArgumentParser(description=help)
    parser.add_argument(
        "--notes", required=True, help="csv or markdown file of categorized PRs in order"
    )
    parser.add_argument(
        "--is-pr-with-link",
        required=False,
        help="exported pr number with hyper-link for forum format",
    )
    parser.add_argument(
        "--convert-with-link",
        required=False,
        help="make PR number in markdown file owning hyper-link",
    )
    args = parser.parse_args()
    user = "apache"
    repo = "tvm"

    if args.convert_with_link:
        with open(args.notes, "r") as f:
            lines = f.readlines()
        formated = []
        for line in lines:
            match = re.search(r"#\d+", line)
            if match:
                pr_num_str = match.group()
                pr_num_int = pr_num_str.replace("#", "")
                pr_number_str = f"[#{pr_num_int}](https://github.com/apache/tvm/pull/{pr_num_int})"
                line = line.replace(pr_num_str, pr_number_str)
            formated.append(line)
        result = "".join(formated)
        print(result)
        exit(0)

    # 1. Create PR dict from cache file
    cache = Path("out.pkl")
    if not cache.exists():
        sprint("run gather_prs.py first to generate out.pkl")
        exit(1)
    pr_dict = create_pr_dict(cache)

    # 2. Categorize csv file as dict by category and subject (sub-category)
    headings = categorize_csv_file(args.notes)

    # 3. Summarize and sort all categories
    def sorter(x):
        if x == "Misc":
            return 10
        return 0

    keys = list(headings.keys())
    keys = list(sorted(keys))
    keys = list(sorted(keys, key=sorter))

    # 4. Generate markdown by loop categorized csv file dict
    def pr_title(number, heading):
        # print(f"number:{number}, heading:{heading}, len(pr_dict):{len(pr_dict)}")
        try:
            title = pr_dict[int(number)]["title"]
            title = strip_header(title, heading)
        except:
            sprint("The out.pkl file is not match with csv file.")
            exit(1)
        return title

    output = ""
    for key in keys:
        value = headings[key]
        if key == "DO NOT INCLUDE":
            continue
        value = dict(value)
        output += f"### {key}\n"
        misc = []
        misc += value.get("n/a", [])
        misc += value.get("Misc", [])
        for pr_number in misc:
            if args.is_pr_with_link:
                pr_number_str = f"[#{pr_number}](https://github.com/apache/tvm/pull/{pr_number})"
            else:
                pr_number_str = f"#{pr_number}"
            pr_str = f" * {pr_number_str} - {pr_title(pr_number, '[' + key + ']')}\n"
            output += pr_str

        for subheading, pr_numbers in value.items():
            if subheading == "DO NOT INCLUDE":
                continue
            if subheading == "n/a" or subheading == "Misc":
                continue
            else:
                output += f" * {subheading} - " + ", ".join([f"#{n}" for n in pr_numbers]) + "\n"
            # print(value)

        output += "\n"

    # 5. Print markdown-format output
    print(output)