summaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td18
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp45
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h2
4 files changed, 82 insertions, 38 deletions
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index a7ac9a1dca6e..e498f70b820d 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -35,9 +35,19 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- if (T->isArrayTy() || T->isStructTy())
+ if (T->isArrayTy())
return true;
+ if (T->isStructTy()) {
+ // For now we only allow homogeneous structs that we can manipulate with
+ // G_MERGE_VALUES and G_UNMERGE_VALUES
+ auto StructT = cast<StructType>(T);
+ for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i)
+ if (StructT->getElementType(i) != StructT->getElementType(0))
+ return false;
+ return true;
+ }
+
EVT VT = TLI.getValueType(DL, T, true);
if (!VT.isSimple() || VT.isVector() ||
!(VT.isInteger() || VT.isFloatingPoint()))
@@ -220,12 +230,16 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
return false;
SmallVector<ArgInfo, 4> SplitVTs;
+ SmallVector<unsigned, 4> Regs;
ArgInfo RetInfo(VReg, Val->getType());
setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, VReg, Offset);
+ Regs.push_back(Reg);
});
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, VReg);
+
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -344,26 +358,6 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
return 1;
}
- /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets.
- /// Note that the source registers are not required to have homogeneous types,
- /// so we use G_INSERT rather than G_MERGE_VALUES.
- // FIXME: Use G_MERGE_VALUES if the types are homogeneous.
- void mergeRegisters(unsigned DstReg, ArrayRef<unsigned> SrcRegs,
- ArrayRef<uint64_t> SrcOffsets) {
- LLT Ty = MRI.getType(DstReg);
-
- unsigned Dst = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildUndef(Dst);
-
- for (unsigned i = 0; i < SrcRegs.size(); ++i) {
- unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]);
- Dst = Tmp;
- }
-
- MIRBuilder.buildCopy(DstReg, Dst);
- }
-
/// Marking a physical register as used is different between formal
/// parameters, where it's a basic block live-in, and call returns, where it's
/// an implicit-def of the call instruction.
@@ -413,22 +407,19 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> ArgInfos;
SmallVector<unsigned, 4> SplitRegs;
- SmallVector<uint64_t, 4> RegOffsets;
unsigned Idx = 0;
for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
SplitRegs.clear();
- RegOffsets.clear();
splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
SplitRegs.push_back(Reg);
- RegOffsets.push_back(Offset);
});
if (!SplitRegs.empty())
- ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets);
+ MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
Idx++;
}
@@ -490,9 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!Arg.IsFixed)
return false;
+ SmallVector<unsigned, 8> Regs;
splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, Arg.Reg, Offset);
+ Regs.push_back(Reg);
});
+
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, Arg.Reg);
}
auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
@@ -508,11 +503,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
ArgInfos.clear();
- SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
splitToValueTypes(OrigRet, ArgInfos, MF,
[&](unsigned Reg, uint64_t Offset) {
- RegOffsets.push_back(Offset);
SplitRegs.push_back(Reg);
});
@@ -521,10 +514,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
- if (!RegOffsets.empty()) {
+ if (!SplitRegs.empty()) {
// We have split the value and allocated each individual piece, now build
// it up again.
- RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets);
+ MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs);
}
}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 817b567db767..5d887c4fcbf2 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -2010,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -2018,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -2028,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
@@ -2059,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -2076,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 2d490b7c303e..a706079d9866 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
+#include "ARMCallLowering.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -63,6 +65,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
+ // FIXME: Support s8 and s16 as well
+ for (unsigned Op : {G_SREM, G_UREM})
+ if (ST.hasDivideInARMMode())
+ setAction({Op, s32}, Lower);
+ else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() ||
+ ST.isTargetMuslAEABI())
+ setAction({Op, s32}, Custom);
+ else
+ setAction({Op, s32}, Libcall);
+
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
for (auto Ty : {s1, s8, s16})
@@ -134,5 +146,38 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
}
return true;
}
+ case G_SREM:
+ case G_UREM: {
+ unsigned OriginalResult = MI.getOperand(0).getReg();
+ auto Size = MRI.getType(OriginalResult).getSizeInBits();
+ if (Size != 32)
+ return false;
+
+ auto Libcall =
+ MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+
+ // Our divmod libcalls return a struct containing the quotient and the
+ // remainder. We need to create a virtual register for it.
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ Type *ArgTy = Type::getInt32Ty(Ctx);
+ StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
+ auto RetVal = MRI.createGenericVirtualRegister(
+ getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
+
+ auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy},
+ {{MI.getOperand(1).getReg(), ArgTy},
+ {MI.getOperand(2).getReg(), ArgTy}});
+ if (Status != LegalizerHelper::Legalized)
+ return false;
+
+ // The remainder is the second result of divmod. Split the return value into
+ // a new, unused register for the quotient and the destination of the
+ // original instruction for the remainder.
+ MIRBuilder.buildUnmerge(
+ {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
+ RetVal);
+
+ return LegalizerHelper::Legalized;
+ }
}
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 7de0543dfa5e..8a1a37863877 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -78,7 +78,7 @@ public:
return 13;
}
- unsigned getRegisterBitWidth(bool Vector) {
+ unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasNEON())
return 128;