|
| 1 | +""" |
| 2 | +hooks/committer.py โ ไฝฟ็จๆบ็ ๆไปถ git ๆถ้ดๅข้็ผๅญ๏ผ |
| 3 | +ๆฏๆ URL ็ผ็ ๅญ็ฌฆ๏ผTwo%20Sum โ Two Sum๏ผ |
| 4 | +""" |
| 5 | + |
1 | 6 | import fnmatch
|
2 | 7 | import json
|
3 | 8 | import os
|
4 | 9 | import random
|
5 |
| -from datetime import datetime |
6 |
| -from typing import List |
| 10 | +import subprocess |
| 11 | +import urllib.parse |
| 12 | +from datetime import datetime, timezone |
| 13 | +from pathlib import Path |
| 14 | +from typing import Dict, List |
7 | 15 |
|
8 | 16 | import requests
|
9 | 17 |
|
10 | 18 |
|
11 |
| -def exclude(src_path: str, globs: List[str]) -> bool: |
12 |
| - """ |
13 |
| - Determine if a src_path should be excluded. |
14 |
| - Supports globs (e.g. folder/* or *.md). |
15 |
| - Credits: code adapted from |
16 |
| - https://github.com/timvink/mkdocs-git-authors-plugin/blob/master/mkdocs_git_authors_plugin/exclude.py |
17 |
| - Args: |
18 |
| - src_path (src): Path of file |
19 |
| - globs (list): list of globs |
20 |
| - Returns: |
21 |
| - (bool): whether src_path should be excluded |
22 |
| - """ |
23 |
| - assert isinstance(src_path, str) |
24 |
| - assert isinstance(globs, list) |
| 19 | +# โโโโโโโโโโโโโโโโโโโโโโโโโ ๆฅๅฟ โโโโโโโโโโโโโโโโโโโโโโโโโ # |
| 20 | +def _now() -> str: |
| 21 | + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
| 22 | + |
| 23 | + |
| 24 | +def _log(msg: str, level: str = "INFO"): |
| 25 | + print(f"{_now()} [{level}] {msg}") |
| 26 | + |
25 | 27 |
|
| 28 | +# โโโโโโโโโโโโโโโโโโโโโโโโโ ๅทฅๅ
ทๅฝๆฐ โโโโโโโโโโโโโโโโโโโโโโโโโ # |
| 29 | +def _exclude(src_path: str, globs: List[str]) -> bool: |
26 | 30 | for g in globs:
|
27 | 31 | if fnmatch.fnmatchcase(src_path, g):
|
28 | 32 | return True
|
29 |
| - |
30 |
| - # Windows reports filenames as eg. a\\b\\c instead of a/b/c. |
31 |
| - # To make the same globs/regexes match filenames on Windows and |
32 |
| - # other OSes, let's try matching against converted filenames. |
33 |
| - # On the other hand, Unix actually allows filenames to contain |
34 |
| - # literal \\ characters (although it is rare), so we won't |
35 |
| - # always convert them. We only convert if os.sep reports |
36 |
| - # something unusual. Conversely, some future mkdocs might |
37 |
| - # report Windows filenames using / separators regardless of |
38 |
| - # os.sep, so we *always* test with / above. |
39 | 33 | if os.sep != "/":
|
40 |
| - src_path_fix = src_path.replace(os.sep, "/") |
41 |
| - if fnmatch.fnmatchcase(src_path_fix, g): |
| 34 | + if fnmatch.fnmatchcase(src_path.replace(os.sep, "/"), g): |
42 | 35 | return True
|
43 | 36 | return False
|
44 | 37 |
|
45 | 38 |
|
46 |
| -def get_header() -> dict: |
| 39 | +def _get_header() -> Dict[str, str]: |
47 | 40 | if "MKDOCS_API_KEYS" in os.environ:
|
48 |
| - keys = os.environ["MKDOCS_API_KEYS"].strip().split(",") |
49 |
| - return {"Authorization": "token " + str(random.choice(keys)).strip()} |
| 41 | + keys = [k.strip() for k in os.environ["MKDOCS_API_KEYS"].split(",") if k.strip()] |
| 42 | + if keys: |
| 43 | + return {"Authorization": "token " + random.choice(keys)} |
50 | 44 | return {}
|
51 | 45 |
|
52 | 46 |
|
| 47 | +def _file_git_datetime(repo_path: str) -> datetime: |
| 48 | + try: |
| 49 | + ts = subprocess.check_output( |
| 50 | + ["git", "log", "-1", "--format=%ct", "--", repo_path], |
| 51 | + text=True, |
| 52 | + stderr=subprocess.DEVNULL, |
| 53 | + ).strip() |
| 54 | + if ts: |
| 55 | + return datetime.fromtimestamp(int(ts), tz=timezone.utc) |
| 56 | + except Exception: |
| 57 | + pass |
| 58 | + return datetime.now(tz=timezone.utc) |
| 59 | + |
| 60 | + |
| 61 | +# โโโโโโโโโโโโโโโโโโโโโโโโโ ไธปๆไปถ โโโโโโโโโโโโโโโโโโโโโโโโโ # |
53 | 62 | class CommitterPlugin:
|
54 | 63 | def __init__(self):
|
55 |
| - self.cache_dir = ".cache/plugin/git-committers" |
56 |
| - self.cache_file = f"{self.cache_dir}/page-authors.json" |
57 |
| - self.cache_page_authors = {} |
58 |
| - self.cache_date = "" |
59 |
| - self.excluded_pages = [] |
60 |
| - self.last_request_return_code = 0 |
| 64 | + self.cache_path = Path(".cache/plugin/git-committers/page-authors.json") |
| 65 | + self.cache_path.parent.mkdir(parents=True, exist_ok=True) |
| 66 | + |
| 67 | + self.page_authors: Dict[str, Dict] = {} |
| 68 | + self.last_request_status = 0 |
| 69 | + |
| 70 | + # โโ MkDocs ไบไปถ โโ # |
| 71 | + def on_pre_build(self, _cfg): |
| 72 | + if self.cache_path.exists(): |
| 73 | + try: |
| 74 | + self.page_authors = json.loads(self.cache_path.read_text())["page_authors"] |
| 75 | + _log(f"Loaded committer cache from {self.cache_path}") |
| 76 | + except Exception as e: |
| 77 | + _log(f"Failed to read cache, ignore: {e}", "WARN") |
| 78 | + |
| 79 | + def on_post_build(self, _cfg): |
| 80 | + out = { |
| 81 | + "cache_date": datetime.now(tz=timezone.utc).isoformat(), |
| 82 | + "page_authors": self.page_authors, |
| 83 | + } |
| 84 | + self.cache_path.write_text(json.dumps(out, ensure_ascii=False, indent=2)) |
| 85 | + _log(f"Saved committer cache to {self.cache_path}") |
| 86 | + |
| 87 | + _log("========= Committer Summary =========") |
| 88 | + for k, v in sorted(self.page_authors.items()): |
| 89 | + _log(f"[SUMMARY] {k} | retrieved: {v.get('retrieved', 'N/A')}") |
| 90 | + _log("=====================================") |
| 91 | + |
| 92 | + def on_page_context(self, context, page, _cfg, _nav): |
| 93 | + if not page.edit_url or _exclude(page.file.src_path, []): |
| 94 | + return context |
| 95 | + |
| 96 | + repo_path = self._repo_path_from_edit_url(page.edit_url) |
| 97 | + api_url = self._api_url_from_repo_path(repo_path) |
| 98 | + authors = self._get_authors_with_cache(api_url, repo_path) |
| 99 | + |
| 100 | + context["committers"] = authors |
| 101 | + context["committers_source"] = "github" if authors else "cache" |
| 102 | + return context |
| 103 | + |
| 104 | + # โโ ๅ
้จๆนๆณ โโ # |
| 105 | + @staticmethod |
| 106 | + def _repo_path_from_edit_url(edit_url: str) -> str: |
| 107 | + """ |
| 108 | + ไพ๏ผ |
| 109 | + edit_url = |
| 110 | + https://github.com/doocs/leetcode/edit/main/solution/0000-0099/0001.Two%20Sum/README.md |
| 111 | + ่ฟๅ๏ผ |
| 112 | + solution/0000-0099/0001.Two Sum/README.md |
| 113 | + """ |
| 114 | + raw_path = edit_url.split("/edit/main/")[-1] |
| 115 | + return urllib.parse.unquote(raw_path) |
61 | 116 |
|
62 | 117 | @staticmethod
|
63 |
| - def get_request_url(edit_url: str) -> str: |
64 |
| - path = edit_url.replace("https://github.com/doocs/leetcode/edit/main", "") |
65 |
| - return f"https://api.github.com/repos/doocs/leetcode/commits?path={path}&sha=main&per_page=100" |
66 |
| - |
67 |
| - def on_pre_build(self, config): |
68 |
| - if os.path.exists(self.cache_file): |
69 |
| - with open(self.cache_file, "r") as f: |
70 |
| - cache = json.loads(f.read()) |
71 |
| - self.cache_date = cache["cache_date"] |
72 |
| - self.cache_page_authors = cache["page_authors"] |
73 |
| - |
74 |
| - def on_post_build(self, config): |
75 |
| - json_data = json.dumps( |
76 |
| - { |
77 |
| - "cache_date": datetime.now().strftime("%Y-%m-%d"), |
78 |
| - "page_authors": self.cache_page_authors, |
79 |
| - } |
| 118 | + def _api_url_from_repo_path(repo_path: str) -> str: |
| 119 | + # ้ๆฐ่ฟ่ก URL ็ผ็ ๏ผ็กฎไฟ็ฉบๆ ผ็ญๅญ็ฌฆๅๆณ |
| 120 | + quoted = urllib.parse.quote(repo_path) |
| 121 | + return ( |
| 122 | + "https://api.github.com/repos/doocs/leetcode/commits" |
| 123 | + f"?path={quoted}&sha=main&per_page=100" |
80 | 124 | )
|
81 |
| - os.makedirs(self.cache_dir, exist_ok=True) |
82 |
| - f = open(self.cache_file, "w") |
83 |
| - f.write(json_data) |
84 |
| - f.close() |
85 |
| - |
86 |
| - def get_contributors_to_file(self, path: str) -> List[dict]: |
87 |
| - # We already got a 401 (unauthorized) or 403 (rate limit) error, so we don't try again |
88 |
| - if self.last_request_return_code in [401, 403]: |
89 |
| - print("Got a 401 or 403 error, not trying again") |
90 |
| - return [] |
91 |
| - |
92 |
| - authors = [] |
93 |
| - print(f"Getting contributors to {path}") |
94 |
| - for _ in range(5): |
| 125 | + |
| 126 | + def _get_authors_with_cache(self, api_url: str, repo_path: str) -> List[Dict]: |
| 127 | + git_mtime = _file_git_datetime(repo_path).isoformat() |
| 128 | + |
| 129 | + cached = self.page_authors.get(repo_path) |
| 130 | + cached_time = cached.get("retrieved") if cached else None |
| 131 | + |
| 132 | + if cached and cached_time and git_mtime <= cached_time: |
| 133 | + _log(f"[CACHE HIT] {repo_path} git:{git_mtime} cache:{cached_time}") |
| 134 | + return cached["authors"] |
| 135 | + |
| 136 | + _log(f"[CACHE MISS] {repo_path} git:{git_mtime} cache:{cached_time}") |
| 137 | + |
| 138 | + if self.last_request_status in (401, 403): |
| 139 | + _log("Skip API request due to previous 401/403", "WARN") |
| 140 | + return cached["authors"] if cached else [] |
| 141 | + |
| 142 | + authors: List[Dict] = [] |
| 143 | + for attempt in range(5): |
95 | 144 | try:
|
96 |
| - r = requests.get(url=path, headers=get_header()) |
| 145 | + r = requests.get(api_url, headers=_get_header(), timeout=10) |
97 | 146 | except Exception as e:
|
98 |
| - print(f"Got an exception: {e}") |
| 147 | + _log(f"Request error ({attempt+1}/5): {e}", "ERROR") |
99 | 148 | continue
|
100 |
| - self.last_request_return_code = r.status_code |
| 149 | + |
| 150 | + self.last_request_status = r.status_code |
101 | 151 | if r.status_code == 200:
|
102 |
| - # Get login, url and avatar for each author. Ensure no duplicates. |
103 |
| - res = r.json() |
104 |
| - for commit in res: |
105 |
| - if ( |
106 |
| - commit["author"] |
107 |
| - and commit["author"]["login"] |
108 |
| - and commit["author"]["login"] |
109 |
| - not in [author["login"] for author in authors] |
110 |
| - ): |
| 152 | + for commit in r.json(): |
| 153 | + author = commit.get("author") or {} |
| 154 | + login = author.get("login") |
| 155 | + if login and login not in {a["login"] for a in authors}: |
111 | 156 | authors.append(
|
112 | 157 | {
|
113 |
| - "login": commit["author"]["login"], |
114 |
| - "name": commit["author"]["login"], |
115 |
| - "url": commit["author"]["html_url"], |
116 |
| - "avatar": commit["author"]["avatar_url"], |
| 158 | + "login": login, |
| 159 | + "name": login, |
| 160 | + "url": author.get("html_url"), |
| 161 | + "avatar": author.get("avatar_url"), |
117 | 162 | }
|
118 | 163 | )
|
119 |
| - return authors |
120 |
| - elif r.status_code in [401, 403]: |
121 |
| - print("Got a 401 or 403 error, not trying again") |
122 |
| - return [] |
123 |
| - return [] |
124 |
| - |
125 |
| - def list_contributors(self, path: str) -> List[dict]: |
126 |
| - path = path.replace("\\", "/") |
127 |
| - authors = self.get_contributors_to_file(path) |
128 |
| - self.cache_page_authors[path] = {"authors": authors} |
| 164 | + break |
| 165 | + elif r.status_code in (401, 403): |
| 166 | + _log(f"GitHub API limit ({r.status_code}); stop further requests", "ERROR") |
| 167 | + return cached["authors"] if cached else [] |
| 168 | + else: |
| 169 | + _log(f"Unexpected status {r.status_code}; retryingโฆ", "ERROR") |
| 170 | + |
| 171 | + self.page_authors[repo_path] = { |
| 172 | + "authors": authors, |
| 173 | + "retrieved": datetime.now(tz=timezone.utc).isoformat(), |
| 174 | + } |
| 175 | + _log(f"[CACHE UPDATE] {repo_path} new authors: {len(authors)}") |
129 | 176 | return authors
|
130 | 177 |
|
131 |
| - def on_page_context(self, context, page, config, nav): |
132 |
| - if not page.edit_url: |
133 |
| - return context |
134 |
| - context["committers"] = [] |
135 |
| - if exclude(page.file.src_path, self.excluded_pages): |
136 |
| - return context |
137 |
| - path = self.get_request_url(page.edit_url) |
138 |
| - authors = self.list_contributors(path) |
139 |
| - if authors: |
140 |
| - context["committers"] = authors |
141 |
| - context["committers_source"] = "github" |
142 |
| - return context |
143 |
| - |
144 | 178 |
|
145 |
| -plugin = CommitterPlugin() |
| 179 | +# โโโโโโโโโโโโโโโโโโโโโโโโโ MkDocs ้้
โโโโโโโโโโโโโโโโโโโโโโโโโ # |
| 180 | +_plugin = CommitterPlugin() |
146 | 181 |
|
147 | 182 |
|
148 |
| -def on_pre_build(config): |
149 |
| - plugin.on_pre_build(config) |
| 183 | +def on_pre_build(cfg): |
| 184 | + _plugin.on_pre_build(cfg) |
150 | 185 |
|
151 | 186 |
|
152 |
| -def on_post_build(config): |
153 |
| - plugin.on_post_build(config) |
| 187 | +def on_post_build(cfg): |
| 188 | + _plugin.on_post_build(cfg) |
154 | 189 |
|
155 | 190 |
|
156 |
| -def on_page_context(context, page, config, nav): |
157 |
| - return plugin.on_page_context(context, page, config, nav) |
| 191 | +def on_page_context(context, page, cfg, nav): |
| 192 | + return _plugin.on_page_context(context, page, cfg, nav) |
0 commit comments