From 0a91a934198fe992df1e473ebfbea9f08cf9ffeb Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 21 Mar 2026 14:58:29 -0700 Subject: [PATCH 1/2] Remove the buggy AArch64 "33rx" relocation --- Python/jit.c | 73 ------------------------------------------ Tools/jit/_stencils.py | 40 ++++------------------- Tools/jit/_writer.py | 10 +----- 3 files changed, 7 insertions(+), 116 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 4990c743224d3c..d56ff6ad156c03 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -355,18 +355,6 @@ patch_aarch64_12(unsigned char *location, uint64_t value) set_bits(loc32, 10, value, shift, 12); } -// Relaxable 12-bit low part of an absolute address. Pairs nicely with -// patch_aarch64_21rx (below). -void -patch_aarch64_12x(unsigned char *location, uint64_t value) -{ - // This can *only* be relaxed if it occurs immediately before a matching - // patch_aarch64_21rx. If that happens, the JIT build step will replace both - // calls with a single call to patch_aarch64_33rx. Otherwise, we end up - // here, and the instruction is patched normally: - patch_aarch64_12(location, value); -} - // 16-bit low part of an absolute address. void patch_aarch64_16a(unsigned char *location, uint64_t value) @@ -427,18 +415,6 @@ patch_aarch64_21r(unsigned char *location, uint64_t value) set_bits(loc32, 5, value, 2, 19); } -// Relaxable 21-bit count of pages between this page and an absolute address's -// page. Pairs nicely with patch_aarch64_12x (above). -void -patch_aarch64_21rx(unsigned char *location, uint64_t value) -{ - // This can *only* be relaxed if it occurs immediately before a matching - // patch_aarch64_12x. If that happens, the JIT build step will replace both - // calls with a single call to patch_aarch64_33rx. Otherwise, we end up - // here, and the instruction is patched normally: - patch_aarch64_21r(location, value); -} - // 21-bit relative branch. void patch_aarch64_19r(unsigned char *location, uint64_t value) @@ -469,55 +445,6 @@ patch_aarch64_26r(unsigned char *location, uint64_t value) set_bits(loc32, 0, value, 2, 26); } -// A pair of patch_aarch64_21rx and patch_aarch64_12x. -void -patch_aarch64_33rx(unsigned char *location, uint64_t value) -{ - uint32_t *loc32 = (uint32_t *)location; - // Try to relax the pair of GOT loads into an immediate value: - assert(IS_AARCH64_ADRP(*loc32)); - unsigned char reg = get_bits(loc32[0], 0, 5); - assert(IS_AARCH64_LDR_OR_STR(loc32[1])); - // There should be only one register involved: - assert(reg == get_bits(loc32[1], 0, 5)); // ldr's output register. - assert(reg == get_bits(loc32[1], 5, 5)); // ldr's input register. - uint64_t relaxed = *(uint64_t *)value; - if (relaxed < (1UL << 16)) { - // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop - loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; - loc32[1] = 0xD503201F; - return; - } - if (relaxed < (1ULL << 32)) { - // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY - loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; - loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg; - return; - } - int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location >> 12); - if (page_delta >= -(1L << 20) && - page_delta < (1L << 20)) - { - // adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB - patch_aarch64_21rx(location, relaxed); - loc32[1] = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg; - return; - } - relaxed = value - (uintptr_t)location; - if ((relaxed & 0x3) == 0 && - (int64_t)relaxed >= -(1L << 19) && - (int64_t)relaxed < (1L << 19)) - { - // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop - loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg; - loc32[1] = 0xD503201F; - return; - } - // Couldn't do it. Just patch the two instructions normally: - patch_aarch64_21rx(location, value); - patch_aarch64_12x(location + 4, value); -} - // Relaxable 32-bit relative address. void patch_x86_64_32rx(unsigned char *location, uint64_t value) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 258de8ab3136a4..55a4aece5427c2 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -57,8 +57,8 @@ class HoleValue(enum.Enum): _PATCH_FUNCS = { # aarch64-apple-darwin: "ARM64_RELOC_BRANCH26": "patch_aarch64_26r", - "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx", - "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x", + "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21r", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12", "ARM64_RELOC_PAGE21": "patch_aarch64_21r", "ARM64_RELOC_PAGEOFF12": "patch_aarch64_12", "ARM64_RELOC_UNSIGNED": "patch_64", @@ -70,21 +70,21 @@ class HoleValue(enum.Enum): # aarch64-pc-windows-msvc: "IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r", "IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r", - "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx", + "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21r", "IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12", - "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x", + "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12", # i686-pc-windows-msvc: "IMAGE_REL_I386_DIR32": "patch_32", "IMAGE_REL_I386_REL32": "patch_x86_64_32rx", # aarch64-unknown-linux-gnu: "R_AARCH64_ABS64": "patch_64", "R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12", - "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx", + "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21r", "R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r", "R_AARCH64_CALL26": "patch_aarch64_26r", "R_AARCH64_CONDBR19": "patch_aarch64_19r", "R_AARCH64_JUMP26": "patch_aarch64_26r", - "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x", + "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12", "R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a", "R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b", "R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c", @@ -171,34 +171,6 @@ class Hole: def __post_init__(self) -> None: self.func = _PATCH_FUNCS[self.kind] - def fold(self, other: typing.Self, body: bytearray) -> typing.Self | None: - """Combine two holes into a single hole, if possible.""" - instruction_a = int.from_bytes( - body[self.offset : self.offset + 4], byteorder=sys.byteorder - ) - instruction_b = int.from_bytes( - body[other.offset : other.offset + 4], byteorder=sys.byteorder - ) - reg_a = instruction_a & 0b11111 - reg_b1 = instruction_b & 0b11111 - reg_b2 = (instruction_b >> 5) & 0b11111 - - if ( - self.offset + 4 == other.offset - and self.value == other.value - and self.symbol == other.symbol - and self.addend == other.addend - and self.func == "patch_aarch64_21rx" - and other.func == "patch_aarch64_12x" - and reg_a == reg_b1 == reg_b2 - ): - # These can *only* be properly relaxed when they appear together and - # patch the same value: - folded = self.replace() - folded.func = "patch_aarch64_33rx" - return folded - return None - def as_c(self, where: str) -> str: """Dump this hole as a call to a patch_* function.""" if self.custom_location: diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index 5fd9a2ee2d6e58..20209450d0d8d0 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -1,6 +1,5 @@ """Utilities for writing StencilGroups out to a C header file.""" -import itertools import typing import math @@ -61,15 +60,8 @@ def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator for part, stencil in [("data", group.data), ("code", group.code)]: if stencil.body.rstrip(b"\x00"): yield f" memcpy({part}, {part}_body, sizeof({part}_body));" - skip = False stencil.holes.sort(key=lambda hole: hole.offset) - for hole, pair in itertools.zip_longest(stencil.holes, stencil.holes[1:]): - if skip: - skip = False - continue - if pair and (folded := hole.fold(pair, stencil.body)): - skip = True - hole = folded + for hole in stencil.holes: yield f" {hole.as_c(part)}" yield "}" yield "" From b2d9f7623bcf6e6ca8d6f2fe2e1a840ee501f9a8 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 21 Mar 2026 15:05:21 -0700 Subject: [PATCH 2/2] blurb add --- .../2026-03-21-15-05-14.gh-issue-146128.DG1Hfa.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-21-15-05-14.gh-issue-146128.DG1Hfa.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-21-15-05-14.gh-issue-146128.DG1Hfa.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-21-15-05-14.gh-issue-146128.DG1Hfa.rst new file mode 100644 index 00000000000000..931e1ac92ce793 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-21-15-05-14.gh-issue-146128.DG1Hfa.rst @@ -0,0 +1,3 @@ +Fix a bug which could cause constant values to be partially corrupted in +AArch64 JIT code. This issue is theoretical, and hasn't actually been +observed in unmodified Python interpreters.