Skip to content

Commit 71033d3

Browse files
committed
chore: update plugin
fix: update fix: update
1 parent 624d408 commit 71033d3

File tree

1 file changed

+149
-114
lines changed

1 file changed

+149
-114
lines changed

โ€Žhooks/committer.py

+149-114
Original file line numberDiff line numberDiff line change
@@ -1,157 +1,192 @@
1+
"""
2+
hooks/committer.py โ€“ ไฝฟ็”จๆบ็ ๆ–‡ไปถ git ๆ—ถ้—ดๅขž้‡็ผ“ๅญ˜๏ผŒ
3+
ๆ”ฏๆŒ URL ็ผ–็ ๅญ—็ฌฆ๏ผˆTwo%20Sum โ†’ Two Sum๏ผ‰
4+
"""
5+
16
import fnmatch
27
import json
38
import os
49
import random
5-
from datetime import datetime
6-
from typing import List
10+
import subprocess
11+
import urllib.parse
12+
from datetime import datetime, timezone
13+
from pathlib import Path
14+
from typing import Dict, List
715

816
import requests
917

1018

11-
def exclude(src_path: str, globs: List[str]) -> bool:
12-
"""
13-
Determine if a src_path should be excluded.
14-
Supports globs (e.g. folder/* or *.md).
15-
Credits: code adapted from
16-
https://github.com/timvink/mkdocs-git-authors-plugin/blob/master/mkdocs_git_authors_plugin/exclude.py
17-
Args:
18-
src_path (src): Path of file
19-
globs (list): list of globs
20-
Returns:
21-
(bool): whether src_path should be excluded
22-
"""
23-
assert isinstance(src_path, str)
24-
assert isinstance(globs, list)
19+
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๆ—ฅๅฟ— โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ #
20+
def _now() -> str:
21+
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
22+
23+
24+
def _log(msg: str, level: str = "INFO"):
25+
print(f"{_now()} [{level}] {msg}")
26+
2527

28+
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๅทฅๅ…ทๅ‡ฝๆ•ฐ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ #
29+
def _exclude(src_path: str, globs: List[str]) -> bool:
2630
for g in globs:
2731
if fnmatch.fnmatchcase(src_path, g):
2832
return True
29-
30-
# Windows reports filenames as eg. a\\b\\c instead of a/b/c.
31-
# To make the same globs/regexes match filenames on Windows and
32-
# other OSes, let's try matching against converted filenames.
33-
# On the other hand, Unix actually allows filenames to contain
34-
# literal \\ characters (although it is rare), so we won't
35-
# always convert them. We only convert if os.sep reports
36-
# something unusual. Conversely, some future mkdocs might
37-
# report Windows filenames using / separators regardless of
38-
# os.sep, so we *always* test with / above.
3933
if os.sep != "/":
40-
src_path_fix = src_path.replace(os.sep, "/")
41-
if fnmatch.fnmatchcase(src_path_fix, g):
34+
if fnmatch.fnmatchcase(src_path.replace(os.sep, "/"), g):
4235
return True
4336
return False
4437

4538

46-
def get_header() -> dict:
39+
def _get_header() -> Dict[str, str]:
4740
if "MKDOCS_API_KEYS" in os.environ:
48-
keys = os.environ["MKDOCS_API_KEYS"].strip().split(",")
49-
return {"Authorization": "token " + str(random.choice(keys)).strip()}
41+
keys = [k.strip() for k in os.environ["MKDOCS_API_KEYS"].split(",") if k.strip()]
42+
if keys:
43+
return {"Authorization": "token " + random.choice(keys)}
5044
return {}
5145

5246

47+
def _file_git_datetime(repo_path: str) -> datetime:
48+
try:
49+
ts = subprocess.check_output(
50+
["git", "log", "-1", "--format=%ct", "--", repo_path],
51+
text=True,
52+
stderr=subprocess.DEVNULL,
53+
).strip()
54+
if ts:
55+
return datetime.fromtimestamp(int(ts), tz=timezone.utc)
56+
except Exception:
57+
pass
58+
return datetime.now(tz=timezone.utc)
59+
60+
61+
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ไธปๆ’ไปถ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ #
5362
class CommitterPlugin:
5463
def __init__(self):
55-
self.cache_dir = ".cache/plugin/git-committers"
56-
self.cache_file = f"{self.cache_dir}/page-authors.json"
57-
self.cache_page_authors = {}
58-
self.cache_date = ""
59-
self.excluded_pages = []
60-
self.last_request_return_code = 0
64+
self.cache_path = Path(".cache/plugin/git-committers/page-authors.json")
65+
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
66+
67+
self.page_authors: Dict[str, Dict] = {}
68+
self.last_request_status = 0
69+
70+
# โ”€โ”€ MkDocs ไบ‹ไปถ โ”€โ”€ #
71+
def on_pre_build(self, _cfg):
72+
if self.cache_path.exists():
73+
try:
74+
self.page_authors = json.loads(self.cache_path.read_text())["page_authors"]
75+
_log(f"Loaded committer cache from {self.cache_path}")
76+
except Exception as e:
77+
_log(f"Failed to read cache, ignore: {e}", "WARN")
78+
79+
def on_post_build(self, _cfg):
80+
out = {
81+
"cache_date": datetime.now(tz=timezone.utc).isoformat(),
82+
"page_authors": self.page_authors,
83+
}
84+
self.cache_path.write_text(json.dumps(out, ensure_ascii=False, indent=2))
85+
_log(f"Saved committer cache to {self.cache_path}")
86+
87+
_log("========= Committer Summary =========")
88+
for k, v in sorted(self.page_authors.items()):
89+
_log(f"[SUMMARY] {k} | retrieved: {v.get('retrieved', 'N/A')}")
90+
_log("=====================================")
91+
92+
def on_page_context(self, context, page, _cfg, _nav):
93+
if not page.edit_url or _exclude(page.file.src_path, []):
94+
return context
95+
96+
repo_path = self._repo_path_from_edit_url(page.edit_url)
97+
api_url = self._api_url_from_repo_path(repo_path)
98+
authors = self._get_authors_with_cache(api_url, repo_path)
99+
100+
context["committers"] = authors
101+
context["committers_source"] = "github" if authors else "cache"
102+
return context
103+
104+
# โ”€โ”€ ๅ†…้ƒจๆ–นๆณ• โ”€โ”€ #
105+
@staticmethod
106+
def _repo_path_from_edit_url(edit_url: str) -> str:
107+
"""
108+
ไพ‹๏ผš
109+
edit_url =
110+
https://github.com/doocs/leetcode/edit/main/solution/0000-0099/0001.Two%20Sum/README.md
111+
่ฟ”ๅ›ž๏ผš
112+
solution/0000-0099/0001.Two Sum/README.md
113+
"""
114+
raw_path = edit_url.split("/edit/main/")[-1]
115+
return urllib.parse.unquote(raw_path)
61116

62117
@staticmethod
63-
def get_request_url(edit_url: str) -> str:
64-
path = edit_url.replace("https://github.com/doocs/leetcode/edit/main", "")
65-
return f"https://api.github.com/repos/doocs/leetcode/commits?path={path}&sha=main&per_page=100"
66-
67-
def on_pre_build(self, config):
68-
if os.path.exists(self.cache_file):
69-
with open(self.cache_file, "r") as f:
70-
cache = json.loads(f.read())
71-
self.cache_date = cache["cache_date"]
72-
self.cache_page_authors = cache["page_authors"]
73-
74-
def on_post_build(self, config):
75-
json_data = json.dumps(
76-
{
77-
"cache_date": datetime.now().strftime("%Y-%m-%d"),
78-
"page_authors": self.cache_page_authors,
79-
}
118+
def _api_url_from_repo_path(repo_path: str) -> str:
119+
# ้‡ๆ–ฐ่ฟ›่กŒ URL ็ผ–็ ๏ผŒ็กฎไฟ็ฉบๆ ผ็ญ‰ๅญ—็ฌฆๅˆๆณ•
120+
quoted = urllib.parse.quote(repo_path)
121+
return (
122+
"https://api.github.com/repos/doocs/leetcode/commits"
123+
f"?path={quoted}&sha=main&per_page=100"
80124
)
81-
os.makedirs(self.cache_dir, exist_ok=True)
82-
f = open(self.cache_file, "w")
83-
f.write(json_data)
84-
f.close()
85-
86-
def get_contributors_to_file(self, path: str) -> List[dict]:
87-
# We already got a 401 (unauthorized) or 403 (rate limit) error, so we don't try again
88-
if self.last_request_return_code in [401, 403]:
89-
print("Got a 401 or 403 error, not trying again")
90-
return []
91-
92-
authors = []
93-
print(f"Getting contributors to {path}")
94-
for _ in range(5):
125+
126+
def _get_authors_with_cache(self, api_url: str, repo_path: str) -> List[Dict]:
127+
git_mtime = _file_git_datetime(repo_path).isoformat()
128+
129+
cached = self.page_authors.get(repo_path)
130+
cached_time = cached.get("retrieved") if cached else None
131+
132+
if cached and cached_time and git_mtime <= cached_time:
133+
_log(f"[CACHE HIT] {repo_path} git:{git_mtime} cache:{cached_time}")
134+
return cached["authors"]
135+
136+
_log(f"[CACHE MISS] {repo_path} git:{git_mtime} cache:{cached_time}")
137+
138+
if self.last_request_status in (401, 403):
139+
_log("Skip API request due to previous 401/403", "WARN")
140+
return cached["authors"] if cached else []
141+
142+
authors: List[Dict] = []
143+
for attempt in range(5):
95144
try:
96-
r = requests.get(url=path, headers=get_header())
145+
r = requests.get(api_url, headers=_get_header(), timeout=10)
97146
except Exception as e:
98-
print(f"Got an exception: {e}")
147+
_log(f"Request error ({attempt+1}/5): {e}", "ERROR")
99148
continue
100-
self.last_request_return_code = r.status_code
149+
150+
self.last_request_status = r.status_code
101151
if r.status_code == 200:
102-
# Get login, url and avatar for each author. Ensure no duplicates.
103-
res = r.json()
104-
for commit in res:
105-
if (
106-
commit["author"]
107-
and commit["author"]["login"]
108-
and commit["author"]["login"]
109-
not in [author["login"] for author in authors]
110-
):
152+
for commit in r.json():
153+
author = commit.get("author") or {}
154+
login = author.get("login")
155+
if login and login not in {a["login"] for a in authors}:
111156
authors.append(
112157
{
113-
"login": commit["author"]["login"],
114-
"name": commit["author"]["login"],
115-
"url": commit["author"]["html_url"],
116-
"avatar": commit["author"]["avatar_url"],
158+
"login": login,
159+
"name": login,
160+
"url": author.get("html_url"),
161+
"avatar": author.get("avatar_url"),
117162
}
118163
)
119-
return authors
120-
elif r.status_code in [401, 403]:
121-
print("Got a 401 or 403 error, not trying again")
122-
return []
123-
return []
124-
125-
def list_contributors(self, path: str) -> List[dict]:
126-
path = path.replace("\\", "/")
127-
authors = self.get_contributors_to_file(path)
128-
self.cache_page_authors[path] = {"authors": authors}
164+
break
165+
elif r.status_code in (401, 403):
166+
_log(f"GitHub API limit ({r.status_code}); stop further requests", "ERROR")
167+
return cached["authors"] if cached else []
168+
else:
169+
_log(f"Unexpected status {r.status_code}; retryingโ€ฆ", "ERROR")
170+
171+
self.page_authors[repo_path] = {
172+
"authors": authors,
173+
"retrieved": datetime.now(tz=timezone.utc).isoformat(),
174+
}
175+
_log(f"[CACHE UPDATE] {repo_path} new authors: {len(authors)}")
129176
return authors
130177

131-
def on_page_context(self, context, page, config, nav):
132-
if not page.edit_url:
133-
return context
134-
context["committers"] = []
135-
if exclude(page.file.src_path, self.excluded_pages):
136-
return context
137-
path = self.get_request_url(page.edit_url)
138-
authors = self.list_contributors(path)
139-
if authors:
140-
context["committers"] = authors
141-
context["committers_source"] = "github"
142-
return context
143-
144178

145-
plugin = CommitterPlugin()
179+
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ MkDocs ้€‚้… โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ #
180+
_plugin = CommitterPlugin()
146181

147182

148-
def on_pre_build(config):
149-
plugin.on_pre_build(config)
183+
def on_pre_build(cfg):
184+
_plugin.on_pre_build(cfg)
150185

151186

152-
def on_post_build(config):
153-
plugin.on_post_build(config)
187+
def on_post_build(cfg):
188+
_plugin.on_post_build(cfg)
154189

155190

156-
def on_page_context(context, page, config, nav):
157-
return plugin.on_page_context(context, page, config, nav)
191+
def on_page_context(context, page, cfg, nav):
192+
return _plugin.on_page_context(context, page, cfg, nav)

0 commit comments

Comments
ย (0)