diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMCallLowering.cpp | 55 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLegalizerInfo.cpp | 45 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.h | 2 |
4 files changed, 82 insertions, 38 deletions
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a7ac9a1dca6e..e498f70b820d 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -35,9 +35,19 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { - if (T->isArrayTy() || T->isStructTy()) + if (T->isArrayTy()) return true; + if (T->isStructTy()) { + // For now we only allow homogeneous structs that we can manipulate with + // G_MERGE_VALUES and G_UNMERGE_VALUES + auto StructT = cast<StructType>(T); + for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i) + if (StructT->getElementType(i) != StructT->getElementType(0)) + return false; + return true; + } + EVT VT = TLI.getValueType(DL, T, true); if (!VT.isSimple() || VT.isVector() || !(VT.isInteger() || VT.isFloatingPoint())) @@ -220,12 +230,16 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return false; SmallVector<ArgInfo, 4> SplitVTs; + SmallVector<unsigned, 4> Regs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); + Regs.push_back(Reg); }); + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VReg); + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -344,26 +358,6 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { return 1; } - /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets. - /// Note that the source registers are not required to have homogeneous types, - /// so we use G_INSERT rather than G_MERGE_VALUES. - // FIXME: Use G_MERGE_VALUES if the types are homogeneous. - void mergeRegisters(unsigned DstReg, ArrayRef<unsigned> SrcRegs, - ArrayRef<uint64_t> SrcOffsets) { - LLT Ty = MRI.getType(DstReg); - - unsigned Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - - for (unsigned i = 0; i < SrcRegs.size(); ++i) { - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]); - Dst = Tmp; - } - - MIRBuilder.buildCopy(DstReg, Dst); - } - /// Marking a physical register as used is different between formal /// parameters, where it's a basic block live-in, and call returns, where it's /// an implicit-def of the call instruction. @@ -413,22 +407,19 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 8> ArgInfos; SmallVector<unsigned, 4> SplitRegs; - SmallVector<uint64_t, 4> RegOffsets; unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); SplitRegs.clear(); - RegOffsets.clear(); splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { SplitRegs.push_back(Reg); - RegOffsets.push_back(Offset); }); if (!SplitRegs.empty()) - ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets); + MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); Idx++; } @@ -490,9 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; + SmallVector<unsigned, 8> Regs; splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, Arg.Reg, Offset); + Regs.push_back(Reg); }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, Arg.Reg); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); @@ -508,11 +503,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - SmallVector<uint64_t, 8> RegOffsets; SmallVector<unsigned, 8> SplitRegs; splitToValueTypes(OrigRet, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { - RegOffsets.push_back(Offset); SplitRegs.push_back(Reg); }); @@ -521,10 +514,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; - if (!RegOffsets.empty()) { + if (!SplitRegs.empty()) { // We have split the value and allocated each individual piece, now build // it up again. - RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets); + MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs); } } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 817b567db767..5d887c4fcbf2 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -2010,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -2018,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2028,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0, IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2059,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2076,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0, IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 2d490b7c303e..a706079d9866 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -63,6 +65,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Libcall); } + // FIXME: Support s8 and s16 as well + for (unsigned Op : {G_SREM, G_UREM}) + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Lower); + else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || + ST.isTargetMuslAEABI()) + setAction({Op, s32}, Custom); + else + setAction({Op, s32}, Libcall); + for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); for (auto Ty : {s1, s8, s16}) @@ -134,5 +146,38 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, } return true; } + case G_SREM: + case G_UREM: { + unsigned OriginalResult = MI.getOperand(0).getReg(); + auto Size = MRI.getType(OriginalResult).getSizeInBits(); + if (Size != 32) + return false; + + auto Libcall = + MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; + + // Our divmod libcalls return a struct containing the quotient and the + // remainder. We need to create a virtual register for it. + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *ArgTy = Type::getInt32Ty(Ctx); + StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); + auto RetVal = MRI.createGenericVirtualRegister( + getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); + + auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); + if (Status != LegalizerHelper::Legalized) + return false; + + // The remainder is the second result of divmod. Split the return value into + // a new, unused register for the quotient and the destination of the + // original instruction for the remainder. + MIRBuilder.buildUnmerge( + {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, + RetVal); + + return LegalizerHelper::Legalized; + } } } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 7de0543dfa5e..8a1a37863877 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -78,7 +78,7 @@ public: return 13; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; |