Skip to content

Commit 8b18bac

Browse files
committed
alighw ith commit0 evaluate
1 parent f99c131 commit 8b18bac

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

docs/render_submissions.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from transformers import AutoTokenizer
1414

1515
from commit0.harness.constants import SPLIT
16+
from commit0.harness.get_pytest_ids import main as get_tests
1617
from commit0.harness.utils import clone_repo
1718
from commit0.cli import write_commit0_config_file
1819

@@ -164,7 +165,7 @@ def get_blank_repo_metrics(
164165

165166

166167
leaderboard_header = """\n\n## Leaderboard ({split})
167-
| Name | Repos Resolved (/{num_repos}) | Total Tests Passed (/{total_num_tests}) | Test Duration (s) | Date | Analysis | Github |
168+
| Name | Repos Resolved (/{num_repos}) | Tests Passed (Total: {total_num_tests}) | Test Duration (s) | Date | Analysis | Github |
168169
|------|:-------------------------:|:--------------------:|:--------------------:|:----------:|----|----| """
169170

170171
submission_table_header = """# Submission Name: **{display_name}** (split: {split})
@@ -203,10 +204,12 @@ def render_mds(overwrite_previous, subfolder="docs"):
203204
if org_name in {"blank", "repos", "submission_repos"}:
204205
continue
205206
for branch_path in glob.glob(os.path.join(org_path, "*.json")):
206-
cum_tests_passed = 0
207+
evaluate_numbers = []
208+
lite_evaluate_numbers = []
209+
# cum_tests_passed = 0
207210
repos_resolved = 0
208211
total_duration = 0.0
209-
lite_cum_tests_passed = 0
212+
# lite_cum_tests_passed = 0
210213
lite_repos_resolved = 0
211214
lite_total_duration = 0.0
212215
branch_metrics = json.load(open(branch_path))
@@ -299,11 +302,14 @@ def render_mds(overwrite_previous, subfolder="docs"):
299302
f"### {shortened_testname}\n\n<details><summary> <pre>{shortened_testname}"
300303
f"</pre></summary><pre>\n{failure['failure_string']}\n</pre>\n</details>\n"
301304
)
302-
cum_tests_passed += pytest_info["summary"]["passed"]
305+
# cum_tests_passed += pytest_info["summary"]["passed"]
306+
num_tests = len(get_tests(repo_name, verbose=0))
307+
evaluate_numbers.append(pytest_info["summary"]["passed"] / num_tests)
303308
total_duration += pytest_info["duration"]
304309
repos_resolved += int(resolved)
305-
if split == "all":
306-
lite_cum_tests_passed += pytest_info["summary"]["passed"]
310+
if split == "all" and repo_name in SPLIT['lite']:
311+
lite_evaluate_numbers.append(pytest_info["summary"]["passed"] / num_tests)
312+
# lite_cum_tests_passed += pytest_info["summary"]["passed"]
307313
lite_total_duration += pytest_info["duration"]
308314
lite_repos_resolved += int(resolved)
309315

@@ -331,20 +337,22 @@ def render_mds(overwrite_previous, subfolder="docs"):
331337
wf.write(back_button + "\n" + submission_page)
332338
analysis_link = f"[Analysis](/{f'analysis_{org_name}_{branch_name}'})"
333339
github_link = f"[Github]({project_page_link})"
334-
leaderboard[split].append((cum_tests_passed,
340+
avg_pass_rate = sum(evaluate_numbers) / len(evaluate_numbers)
341+
leaderboard[split].append((avg_pass_rate * 100,
335342
f"\n|{display_name}|"
336343
f"{repos_resolved}|"
337-
f"{cum_tests_passed}|"
344+
f"{avg_pass_rate*100:.2f}%|"
338345
f"{total_duration:.2f}|"
339346
f"{submission_date}|"
340347
f"{analysis_link}|"
341348
f"{github_link}|"
342349
))
343350
if ((split == "all") and ("Reference (Gold)" not in display_name)):
344-
leaderboard["lite"].append((lite_cum_tests_passed,
351+
avg_lite_pass_rate = sum(lite_evaluate_numbers) / len(lite_evaluate_numbers)
352+
leaderboard["lite"].append((avg_lite_pass_rate * 100,
345353
f"\n|{display_name} (subset of `all`)|"
346354
f"{lite_repos_resolved}|"
347-
f"{lite_cum_tests_passed}|"
355+
f"{avg_lite_pass_rate*100:.2f}%|"
348356
f"{lite_total_duration:.2f}|"
349357
f"{submission_date}|"
350358
f"{analysis_link}|"

0 commit comments

Comments
 (0)