diff options
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 169 |
1 files changed, 56 insertions, 113 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 427ec06a2fff..8d00e055306d 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -951,8 +951,7 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, - const ABIArgInfo& current) const; + /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -1536,27 +1535,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } -ABIArgInfo -X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, - const ABIArgInfo ¤t) const { - // Assumes vectorCall calling convention. - const Type *Base = nullptr; - uint64_t NumElts = 0; - - if (!Ty->isBuiltinType() && !Ty->isVectorType() && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - // HVA types get passed directly in registers if there is room. - State.FreeSSERegs -= NumElts; - return getDirectX86Hva(); - } - // If there's no room, the HVA gets passed as normal indirect - // structure. - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - return current; -} - ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1575,35 +1553,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } } - // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets, regcall uses some of the HVA rules. + // Regcall uses the concept of a homogenous vector aggregate, similar + // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if ((State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) && + if (State.CC == llvm::CallingConv::X86_RegCall && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.CC == llvm::CallingConv::X86_RegCall) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { - if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { - // Actual floating-point types get registers first time through if - // there is registers available - State.FreeSSERegs -= NumElts; + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); - } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { - // HVA Types only get registers after everything else has been - // set, so it gets set as indirect for now. - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); - } + return ABIArgInfo::getExpand(); } + return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1684,31 +1647,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, bool &UsedInAlloca) const { - // Vectorcall only allows the first 6 parameters to be passed in registers, - // and homogeneous vector aggregates are only put into registers as a second - // priority. - unsigned Count = 0; - CCState ZeroState = State; - ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; - // HVAs must be done as a second priority for registers, so the deferred - // items are dealt with by going through the pattern a second time. + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // After that, integers AND HVAs are assigned Left to Right in the same pass. + // Integers are passed as ECX/EDX if one is available (in order). HVAs will + // first take up the remaining YMM/XMM registers. If insufficient registers + // remain but an integer register (ECX/EDX) is available, it will be passed + // in that, else, on the stack. for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = classifyArgumentType(I.type, State); - else - // Parameters after the 6th cannot be passed in registers, - // so pretend there are no registers left for them. - I.info = classifyArgumentType(I.type, ZeroState); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - ++Count; + // First pass do all the vector types. + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = ABIArgInfo::getDirect(); + } else { + I.info = classifyArgumentType(Ty, State); + } + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } - Count = 0; - // Go through the arguments a second time to get HVAs registers if there - // are still some available. + for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, State, I.info); - ++Count; + // Second pass, do the rest! + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); + + if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { + // Assign true HVAs (non vector/native FP types). + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = getDirectX86Hva(); + } else { + I.info = getIndirectResult(Ty, /*ByVal=*/false, State); + } + } else if (!IsHva) { + // Assign all Non-HVAs, so this will exclude Vector/FP args. + I.info = classifyArgumentType(Ty, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } } @@ -3901,6 +3886,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, bool IsRegCall) const { unsigned Count = 0; for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed + // as XMM/YMM registers. if (Count < VectorcallMaxParamNumAsReg) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); else { @@ -3913,11 +3900,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, ++Count; } - Count = 0; for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); - ++Count; + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); } } @@ -7344,8 +7328,6 @@ public: }; } -static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); - void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, @@ -7402,8 +7384,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } - - appendOpenCLVersionMD(M); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -8074,8 +8054,6 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; @@ -8090,41 +8068,6 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { } } -/// Emit SPIR specific metadata: OpenCL and SPIR version. -void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the - // opencl.spir.version named metadata. - llvm::Metadata *SPIRVerElts[] = { - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))}; - llvm::NamedMDNode *SPIRVerMD = - M.getOrInsertNamedMetadata("opencl.spir.version"); - SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); - appendOpenCLVersionMD(CGM); -} - -static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the - // opencl.ocl.version named metadata node. - llvm::Metadata *OCLVerElts[] = { - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; - llvm::NamedMDNode *OCLVerMD = - M.getOrInsertNamedMetadata("opencl.ocl.version"); - OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); -} - unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } |