Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a bug which could cause constant values to be partially corrupted in
AArch64 JIT code. This issue is theoretical, and hasn't actually been
observed in unmodified Python interpreters.
73 changes: 0 additions & 73 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,18 +355,6 @@ patch_aarch64_12(unsigned char *location, uint64_t value)
set_bits(loc32, 10, value, shift, 12);
}

// Relaxable 12-bit low part of an absolute address. Pairs nicely with
// patch_aarch64_21rx (below).
void
patch_aarch64_12x(unsigned char *location, uint64_t value)
{
// This can *only* be relaxed if it occurs immediately before a matching
// patch_aarch64_21rx. If that happens, the JIT build step will replace both
// calls with a single call to patch_aarch64_33rx. Otherwise, we end up
// here, and the instruction is patched normally:
patch_aarch64_12(location, value);
}

// 16-bit low part of an absolute address.
void
patch_aarch64_16a(unsigned char *location, uint64_t value)
Expand Down Expand Up @@ -427,18 +415,6 @@ patch_aarch64_21r(unsigned char *location, uint64_t value)
set_bits(loc32, 5, value, 2, 19);
}

// Relaxable 21-bit count of pages between this page and an absolute address's
// page. Pairs nicely with patch_aarch64_12x (above).
void
patch_aarch64_21rx(unsigned char *location, uint64_t value)
{
// This can *only* be relaxed if it occurs immediately before a matching
// patch_aarch64_12x. If that happens, the JIT build step will replace both
// calls with a single call to patch_aarch64_33rx. Otherwise, we end up
// here, and the instruction is patched normally:
patch_aarch64_21r(location, value);
}

// 21-bit relative branch.
void
patch_aarch64_19r(unsigned char *location, uint64_t value)
Expand Down Expand Up @@ -469,55 +445,6 @@ patch_aarch64_26r(unsigned char *location, uint64_t value)
set_bits(loc32, 0, value, 2, 26);
}

// A pair of patch_aarch64_21rx and patch_aarch64_12x.
void
patch_aarch64_33rx(unsigned char *location, uint64_t value)
{
uint32_t *loc32 = (uint32_t *)location;
// Try to relax the pair of GOT loads into an immediate value:
assert(IS_AARCH64_ADRP(*loc32));
unsigned char reg = get_bits(loc32[0], 0, 5);
assert(IS_AARCH64_LDR_OR_STR(loc32[1]));
// There should be only one register involved:
assert(reg == get_bits(loc32[1], 0, 5)); // ldr's output register.
assert(reg == get_bits(loc32[1], 5, 5)); // ldr's input register.
uint64_t relaxed = *(uint64_t *)value;
if (relaxed < (1UL << 16)) {
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
loc32[1] = 0xD503201F;
return;
}
if (relaxed < (1ULL << 32)) {
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg;
return;
}
int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location >> 12);
if (page_delta >= -(1L << 20) &&
page_delta < (1L << 20))
{
// adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB
patch_aarch64_21rx(location, relaxed);
loc32[1] = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg;
return;
}
relaxed = value - (uintptr_t)location;
if ((relaxed & 0x3) == 0 &&
(int64_t)relaxed >= -(1L << 19) &&
(int64_t)relaxed < (1L << 19))
{
// adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop
loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg;
loc32[1] = 0xD503201F;
return;
}
// Couldn't do it. Just patch the two instructions normally:
patch_aarch64_21rx(location, value);
patch_aarch64_12x(location + 4, value);
}

// Relaxable 32-bit relative address.
void
patch_x86_64_32rx(unsigned char *location, uint64_t value)
Expand Down
40 changes: 6 additions & 34 deletions Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class HoleValue(enum.Enum):
_PATCH_FUNCS = {
# aarch64-apple-darwin:
"ARM64_RELOC_BRANCH26": "patch_aarch64_26r",
"ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x",
"ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21r",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12",
"ARM64_RELOC_PAGE21": "patch_aarch64_21r",
"ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
"ARM64_RELOC_UNSIGNED": "patch_64",
Expand All @@ -70,21 +70,21 @@ class HoleValue(enum.Enum):
# aarch64-pc-windows-msvc:
"IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r",
"IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r",
"IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx",
"IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21r",
"IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12",
"IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x",
"IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12",
# i686-pc-windows-msvc:
"IMAGE_REL_I386_DIR32": "patch_32",
"IMAGE_REL_I386_REL32": "patch_x86_64_32rx",
# aarch64-unknown-linux-gnu:
"R_AARCH64_ABS64": "patch_64",
"R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12",
"R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
"R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21r",
"R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
"R_AARCH64_CALL26": "patch_aarch64_26r",
"R_AARCH64_CONDBR19": "patch_aarch64_19r",
"R_AARCH64_JUMP26": "patch_aarch64_26r",
"R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
"R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12",
"R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
"R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b",
"R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c",
Expand Down Expand Up @@ -171,34 +171,6 @@ class Hole:
def __post_init__(self) -> None:
self.func = _PATCH_FUNCS[self.kind]

def fold(self, other: typing.Self, body: bytearray) -> typing.Self | None:
"""Combine two holes into a single hole, if possible."""
instruction_a = int.from_bytes(
body[self.offset : self.offset + 4], byteorder=sys.byteorder
)
instruction_b = int.from_bytes(
body[other.offset : other.offset + 4], byteorder=sys.byteorder
)
reg_a = instruction_a & 0b11111
reg_b1 = instruction_b & 0b11111
reg_b2 = (instruction_b >> 5) & 0b11111

if (
self.offset + 4 == other.offset
and self.value == other.value
and self.symbol == other.symbol
and self.addend == other.addend
and self.func == "patch_aarch64_21rx"
and other.func == "patch_aarch64_12x"
and reg_a == reg_b1 == reg_b2
):
# These can *only* be properly relaxed when they appear together and
# patch the same value:
folded = self.replace()
folded.func = "patch_aarch64_33rx"
return folded
return None

def as_c(self, where: str) -> str:
"""Dump this hole as a call to a patch_* function."""
if self.custom_location:
Expand Down
10 changes: 1 addition & 9 deletions Tools/jit/_writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Utilities for writing StencilGroups out to a C header file."""

import itertools
import typing
import math

Expand Down Expand Up @@ -61,15 +60,8 @@ def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator
for part, stencil in [("data", group.data), ("code", group.code)]:
if stencil.body.rstrip(b"\x00"):
yield f" memcpy({part}, {part}_body, sizeof({part}_body));"
skip = False
stencil.holes.sort(key=lambda hole: hole.offset)
for hole, pair in itertools.zip_longest(stencil.holes, stencil.holes[1:]):
if skip:
skip = False
continue
if pair and (folded := hole.fold(pair, stencil.body)):
skip = True
hole = folded
for hole in stencil.holes:
yield f" {hole.as_c(part)}"
yield "}"
yield ""
Expand Down
Loading