summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64CallingConvention.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64CallingConvention.td')
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td88
1 files changed, 80 insertions, 8 deletions
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index d969a9e1ab3a..bccbbd4591ed 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -17,6 +17,10 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
+class CCIfILP32<CCAction A> :
+ CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -70,6 +74,18 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCPassIndirect<i64>>,
+
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCAssignToReg<[P0, P1, P2, P3]>>,
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCPassIndirect<i64>>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -111,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
@@ -135,7 +152,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
- CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCAssignToReg<[P0, P1, P2, P3]>>
]>;
// Vararg functions on windows pass floats in integer registers
@@ -202,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+ // values and clobbering neighbouring stack locations. Not very pretty.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
@@ -229,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCAssignToStack<16, 16>>
]>;
+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+ // Handle all scalar types as either i32 or f32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f16], CCPromoteToType<f32>>,
+
+ // Everything is on the stack.
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+
// The WebKit_JS calling convention only passes the first argument (the callee)
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
@@ -298,6 +351,12 @@ def CC_AArch64_GHC : CallingConv<[
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
]>;
+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
// presumably a callee to someone. External functions may not do so, but this
// is currently safe since BL has LR as an implicit-def and what happens after a
@@ -306,7 +365,13 @@ def CC_AArch64_GHC : CallingConv<[
// It would be better to model its preservation semantics properly (create a
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
// end up saving LR as part of a call frame). Watch this space...
-def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
@@ -314,17 +379,24 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
// and not (LR,FP) pairs.
-def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, FP, LR,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
// AArch64 PCS for vector functions (VPCS)
// must (additionally) preserve full Q8-Q23 registers
-def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
(sequence "Q%u", 8, 23))>;
+// Functions taking SVE arguments or returning an SVE type
+// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
+ (sequence "Z%u", 8, 23),
+ (sequence "P%u", 4, 15))>;
+
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@@ -336,7 +408,7 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
+ : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
@@ -352,7 +424,7 @@ def CSR_AArch64_TLS_Darwin
// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
def CSR_AArch64_CXX_TLS_Darwin
- : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
(sequence "D%u", 0, 31))>;