Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 48 additions & 20 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,15 +596,27 @@ impl ABIMachineSpec for AArch64MachineDeps {
}

if setup_frame {
// stp fp (x29), lr (x30), [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: fp_reg(),
rt2: link_reg(),
mem: PairAMode::SPPreIndexed {
simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
},
flags: MemFlags::trusted(),
});
let setup_area_size = frame_layout.setup_area_size as i64;
if setup_area_size == 8 {
// str fp, [sp, #-8]!
insts.push(Inst::Store64 {
rd: fp_reg(),
mem: AMode::SPPreIndexed {
simm9: SImm9::maybe_from_i64(-setup_area_size).unwrap(),
},
flags: MemFlags::trusted(),
});
} else {
// stp fp (x29), lr (x30), [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: fp_reg(),
rt2: link_reg(),
mem: PairAMode::SPPreIndexed {
simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
},
flags: MemFlags::trusted(),
});
}

if flags.unwind_info() {
insts.push(Inst::Unwind {
Expand Down Expand Up @@ -645,15 +657,27 @@ impl ABIMachineSpec for AArch64MachineDeps {
// clobber-restore code (which also frees the fixed frame). Hence, there
// is no need for the usual `mov sp, fp` here.

// `ldp fp, lr, [sp], #16`
insts.push(Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
mem: PairAMode::SPPostIndexed {
simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
},
flags: MemFlags::trusted(),
});
let setup_area_size = frame_layout.setup_area_size as i64;
if setup_area_size == 8 {
// `ldr fp, [sp], #8`
insts.push(Inst::ULoad64 {
rd: writable_fp_reg(),
mem: AMode::SPPostIndexed {
simm9: SImm9::maybe_from_i64(setup_area_size).unwrap(),
},
flags: MemFlags::trusted(),
});
} else {
// `ldp fp, lr, [sp], #16`
insts.push(Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
mem: PairAMode::SPPostIndexed {
simm7: SImm7Scaled::maybe_from_i64(setup_area_size, types::I64).unwrap(),
},
flags: MemFlags::trusted(),
});
}
}

if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
Expand Down Expand Up @@ -1144,16 +1168,20 @@ impl ABIMachineSpec for AArch64MachineDeps {

// Compute linkage frame size.
let setup_area_size = if flags.preserve_frame_pointers()
|| function_calls != FunctionCalls::None
// The function arguments that are passed on the stack are addressed
// relative to the Frame Pointer.
|| flags.unwind_info()
|| incoming_args_size > 0
|| clobber_size > 0
|| fixed_frame_storage_size > 0
{
16 // FP, LR
} else {
0
match function_calls {
FunctionCalls::Regular => 16,
FunctionCalls::None => 0,
FunctionCalls::TailOnly => 8,
}
};

// Return FrameLayout structure.
Expand Down
37 changes: 25 additions & 12 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3635,19 +3635,32 @@ fn emit_return_call_common_sequence<T>(
// clobber-restore code (which also frees the fixed frame). Hence, there
// is no need for the usual `mov sp, fp` here.

// `ldp fp, lr, [sp], #16`
Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
mem: PairAMode::SPPostIndexed {
// TODO: we could fold the increment for incoming_args_diff here, as long as that
// value is less than 502*8, by adding it to `setup_area_size`.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
},
flags: MemFlags::trusted(),
if setup_area_size == 8 {
// `ldr fp, [sp], #8`
Inst::ULoad64 {
rd: writable_fp_reg(),
mem: AMode::SPPostIndexed {
simm9: SImm9::maybe_from_i64(i64::from(setup_area_size)).unwrap(),
},
flags: MemFlags::trusted(),
}
.emit(sink, emit_info, state);
} else {
// `ldp fp, lr, [sp], #16`
Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
mem: PairAMode::SPPostIndexed {
// TODO: we could fold the increment for incoming_args_diff here, as long as that
// value is less than 502*8, by adding it to `setup_area_size`.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64)
.unwrap(),
},
flags: MemFlags::trusted(),
}
.emit(sink, emit_info, state);
}
.emit(sink, emit_info, state);
}

// Adjust SP to account for the possible over-allocation in the prologue.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
test compile precise-output
set unwind_info=false
set preserve_frame_pointers=false
target aarch64

;; Test 1: Tail calling convention with tail-only calls
;; FunctionCalls::TailOnly → setup_area_size = 8 → optimized frame
function %tail_only_function() -> i64 tail {
fn0 = colocated %target_func() -> i64 tail

block0:
return_call fn0()
}

; VCode:
; str fp, [sp, #-8]!
; mov fp, sp
; block0:
; return_call TestCase(%target_func) new_stack_arg_size:0
;
; Disassembled:
; block0: ; offset 0x0
; str x29, [sp, #-8]!
; mov x29, sp
; block1: ; offset 0x8
; ldr x29, [sp], #8
; b #0xc ; reloc_external Call %target_func 0

;; Test 2: SystemV calling convention with regular calls
;; FunctionCalls::Regular → setup_area_size = 16 → standard frame
function %systemv_regular_function() -> i64 system_v {
fn0 = colocated %target_func() -> i64 system_v

block0:
v0 = call fn0()
return v0
}

; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; bl 0
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; block1: ; offset 0x8
; bl #8 ; reloc_external Call %target_func 0
; ldp x29, x30, [sp], #0x10
; ret

;; Test 3: Tail calling convention with conditional tail calls
;; Multiple return_call instructions still → FunctionCalls::TailOnly → optimized frame
function %tail_only_conditional(i64) -> i64 tail {
fn0 = colocated %target_func() -> i64 tail
fn1 = colocated %other_func() -> i64 tail

block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sgt v0, v1
brif v2, block1, block2

block1:
return_call fn0()

block2:
return_call fn1()
}

; VCode:
; str fp, [sp, #-8]!
; mov fp, sp
; block0:
; subs xzr, x2, #0
; b.gt label2 ; b label1
; block1:
; return_call TestCase(%other_func) new_stack_arg_size:0
; block2:
; return_call TestCase(%target_func) new_stack_arg_size:0
;
; Disassembled:
; block0: ; offset 0x0
; str x29, [sp, #-8]!
; mov x29, sp
; block1: ; offset 0x8
; cmp x2, #0
; b.gt #0x18
; block2: ; offset 0x10
; ldr x29, [sp], #8
; b #0x14 ; reloc_external Call %other_func 0
; block3: ; offset 0x18
; ldr x29, [sp], #8
; b #0x1c ; reloc_external Call %target_func 0

;; Target functions for testing
function %target_func() -> i64 {
block0:
v0 = iconst.i64 42
return v0
}

; VCode:
; block0:
; movz x0, #42
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov x0, #0x2a
; ret

function %other_func() -> i64 {
block0:
v0 = iconst.i64 24
return v0
}

; VCode:
; block0:
; movz x0, #24
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov x0, #0x18
; ret
Loading