-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
GH-136895: Update JIT builds to use LLVM 20 #140329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d145747
d86b66e
9324b14
f3bb6b9
b6e7981
e867644
13e9f5b
84781b4
0034f14
cc98d30
76842eb
a732cec
e8395ce
d1e4363
01aed67
94f1a89
e6450de
1adf827
b9bfacf
57c44ee
081ee86
0b773f9
c78af6f
d715cf2
38e11b9
74eb9a4
ca95652
47153a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Update JIT compilation to use LLVM 20 at build time. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,62 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import contextlib | ||
import hashlib | ||
import io | ||
import json | ||
import os | ||
import pathlib | ||
import shutil | ||
import sys | ||
import time | ||
import urllib.error | ||
import urllib.request | ||
import zipfile | ||
|
||
|
||
def retrieve_with_retries(download_location, output_path, reporthook, | ||
max_retries=7): | ||
"""Download a file with exponential backoff retry and save to disk.""" | ||
# Mapping of binary dependency tag to GitHub release asset ID | ||
TAG_TO_ASSET_ID = { | ||
'llvm-20.1.8.0': 301710576, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking comment, because what's here should work. However, this hard-coded mapping seems like something that we don't want to have to maintain, and should be relatively easy to avoid by naming things consistently. When I created the 'release' for this I just used the original filename, but it's easy enough to rename it to something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I think I originally set it up this way so the archives could come directly from upstream distributors without any changes, but renaming is probably easier/better. |
||
|
||
|
||
def request_with_retry(request_func, *args, max_retries=7, | ||
err_msg='Request failed.', **kwargs): | ||
"""Make a request using request_func with exponential backoff""" | ||
for attempt in range(max_retries + 1): | ||
try: | ||
resp = urllib.request.urlretrieve( | ||
download_location, | ||
output_path, | ||
reporthook=reporthook, | ||
) | ||
resp = request_func(*args, **kwargs) | ||
except (urllib.error.URLError, ConnectionError) as ex: | ||
if attempt == max_retries: | ||
msg = f"Download from {download_location} failed." | ||
raise OSError(msg) from ex | ||
raise OSError(err_msg) from ex | ||
time.sleep(2.25**attempt) | ||
else: | ||
return resp | ||
|
||
|
||
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): | ||
repo = f'cpython-{"bin" if binary else "source"}-deps' | ||
def retrieve_with_retries(download_location, output_path, reporthook): | ||
"""Download a file with retries.""" | ||
return request_with_retry( | ||
urllib.request.urlretrieve, | ||
download_location, | ||
output_path, | ||
reporthook, | ||
err_msg=f'Download from {download_location} failed.', | ||
) | ||
|
||
|
||
def get_with_retries(url, headers): | ||
req = urllib.request.Request(url=url, headers=headers, method='GET') | ||
return request_with_retry( | ||
urllib.request.urlopen, | ||
req, | ||
err_msg=f'Request to {url} failed.' | ||
) | ||
Comment on lines
+38
to
+55
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we have two similar functions? I see they are different on the function they use. Why do we need them both? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is code I wrote, so I can try and answer this :) The idea is that we need to do two types of requests:
For 1, For 2, we need to pass |
||
|
||
|
||
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose=False): | ||
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps' | ||
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip' | ||
reporthook = None | ||
if verbose: | ||
|
@@ -44,6 +70,51 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): | |
return filename | ||
|
||
|
||
def fetch_release_asset(asset_id, output_path, org): | ||
"""Download a GitHub release asset. | ||
Release assets need the Content-Type header set to | ||
application/octet-stream to download the binary, so we can't use | ||
urlretrieve. Code here is based on urlretrieve. | ||
""" | ||
url = f'https://api.github.com/repos/{org}/cpython-bin-deps/releases/assets/{asset_id}' | ||
metadata_resp = get_with_retries(url, | ||
headers={'Accept': 'application/vnd.github+json'}) | ||
json_data = json.loads(metadata_resp.read()) | ||
hash_info = json_data.get('digest') | ||
if not hash_info: | ||
raise RuntimeError(f'Release asset {asset_id} missing digest field in metadata') | ||
algorithm, hashsum = hash_info.split(':') | ||
if algorithm != 'sha256': | ||
raise RuntimeError(f'Unknown hash algorithm {algorithm} for asset {asset_id}') | ||
with contextlib.closing( | ||
get_with_retries(url, headers={'Accept': 'application/octet-stream'}) | ||
) as resp: | ||
hasher = hashlib.sha256() | ||
with open(output_path, 'wb') as fp: | ||
while block := resp.read(io.DEFAULT_BUFFER_SIZE): | ||
hasher.update(block) | ||
fp.write(block) | ||
if hasher.hexdigest() != hashsum: | ||
raise RuntimeError('Downloaded content hash did not match!') | ||
|
||
|
||
def fetch_release(tag, tarball_dir, *, org='python', verbose=False): | ||
tarball_dir.mkdir(parents=True, exist_ok=True) | ||
asset_id = TAG_TO_ASSET_ID.get(tag) | ||
if asset_id is None: | ||
raise ValueError(f'Unknown tag for binary dependencies {tag}') | ||
output_path = tarball_dir / f'{tag}.tar.xz' | ||
fetch_release_asset(asset_id, output_path, org) | ||
return output_path | ||
|
||
|
||
def extract_tarball(externals_dir, tarball_path, tag): | ||
output_path = externals_dir / tag | ||
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path)) | ||
return output_path | ||
|
||
|
||
def extract_zip(externals_dir, zip_path): | ||
with zipfile.ZipFile(os.fspath(zip_path)) as zf: | ||
zf.extractall(os.fspath(externals_dir)) | ||
|
@@ -67,15 +138,36 @@ def parse_args(): | |
|
||
def main(): | ||
args = parse_args() | ||
zip_path = fetch_zip( | ||
args.tag, | ||
args.externals_dir / 'zips', | ||
org=args.organization, | ||
binary=args.binary, | ||
verbose=args.verbose, | ||
) | ||
final_name = args.externals_dir / args.tag | ||
extracted = extract_zip(args.externals_dir, zip_path) | ||
|
||
# Check if the dependency already exists in externals/ directory | ||
# (either already downloaded/extracted, or checked into the git tree) | ||
if final_name.exists(): | ||
if args.verbose: | ||
print(f'{args.tag} already exists at {final_name}, skipping download.') | ||
return | ||
|
||
# Determine download method: release artifacts for large deps (like LLVM), | ||
# otherwise zip download from GitHub branches | ||
if args.tag in TAG_TO_ASSET_ID: | ||
tarball_path = fetch_release( | ||
args.tag, | ||
args.externals_dir / 'tarballs', | ||
org=args.organization, | ||
verbose=args.verbose, | ||
) | ||
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag) | ||
else: | ||
# Use zip download from GitHub branches | ||
# (cpython-bin-deps if --binary, cpython-source-deps otherwise) | ||
zip_path = fetch_zip( | ||
args.tag, | ||
args.externals_dir / 'zips', | ||
org=args.organization, | ||
binary=args.binary, | ||
verbose=args.verbose, | ||
) | ||
extracted = extract_zip(args.externals_dir, zip_path) | ||
for wait in [1, 2, 3, 5, 8, 0]: | ||
try: | ||
extracted.replace(final_name) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -444,12 +444,18 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value) | |
} | ||
|
||
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state); | ||
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state); | ||
|
||
#include "jit_stencils.h" | ||
|
||
#if defined(__aarch64__) || defined(_M_ARM64) | ||
#define TRAMPOLINE_SIZE 16 | ||
#define DATA_ALIGN 8 | ||
#elif defined(__x86_64__) && defined(__APPLE__) | ||
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative | ||
// range. | ||
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment | ||
#define DATA_ALIGN 16 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does the data need to be aligned to 16? |
||
#else | ||
#define TRAMPOLINE_SIZE 0 | ||
#define DATA_ALIGN 1 | ||
|
@@ -501,6 +507,48 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) | |
patch_aarch64_26r(location, (uintptr_t)p); | ||
} | ||
|
||
// Generate and patch x86_64 trampolines. | ||
void | ||
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state) | ||
{ | ||
uint64_t value = (uintptr_t)symbols_map[ordinal]; | ||
int64_t range = (int64_t)value - 4 - (int64_t)location; | ||
|
||
// If we are in range of 32 signed bits, we can patch directly | ||
if (range >= -(1LL << 31) && range < (1LL << 31)) { | ||
patch_32r(location, value - 4); | ||
return; | ||
} | ||
|
||
// Out of range - need a trampoline | ||
const uint32_t symbol_mask = 1 << (ordinal % 32); | ||
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32]; | ||
assert(symbol_mask & trampoline_mask); | ||
|
||
// Count the number of set bits in the trampoline mask lower than ordinal | ||
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1)); | ||
for (int i = 0; i < ordinal / 32; i++) { | ||
index += _Py_popcount32(state->trampolines.mask[i]); | ||
} | ||
|
||
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE; | ||
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size); | ||
|
||
/* Generate the trampoline (14 bytes, padded to 16): | ||
0: ff 25 00 00 00 00 jmp *(%rip) | ||
6: XX XX XX XX XX XX XX XX (64-bit target address) | ||
|
||
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64) | ||
*/ | ||
trampoline[0] = 0xFF; | ||
trampoline[1] = 0x25; | ||
*(uint32_t *)(trampoline + 2) = 0; | ||
*(uint64_t *)(trampoline + 6) = value; | ||
|
||
// Patch the call site to call the trampoline instead | ||
patch_32r(location, (uintptr_t)trampoline - 4); | ||
} | ||
|
||
static void | ||
combine_symbol_mask(const symbol_mask src, symbol_mask dest) | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Really not major.. can we stick the version in a variable?