diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-07-05 14:21:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-07-05 14:21:36 +0000 |
commit | 1a82d4c088707c791c792f6822f611b47a12bdfe (patch) | |
tree | 7c411f9b5d807f7f204fdd16965d8925a82b6d18 /lib/Target | |
parent | 3a0822f094b578157263e04114075ad7df81db41 (diff) | |
download | src-test2-1a82d4c088707c791c792f6822f611b47a12bdfe.tar.gz src-test2-1a82d4c088707c791c792f6822f611b47a12bdfe.zip |
Notes
Diffstat (limited to 'lib/Target')
420 files changed, 6777 insertions, 2259 deletions
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 6c5a083b393d..bffd9e6e8c76 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -148,7 +148,7 @@ private: Color getColor(unsigned Register); Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L); }; -} // namespace +} char AArch64A57FPLoadBalancing::ID = 0; diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp index 176403ce124a..d973234dd86a 100644 --- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -102,7 +102,7 @@ public: } }; char AArch64BranchRelaxation::ID = 0; -} // namespace +} /// verify - check BBOffsets, BBSizes, alignment of islands void AArch64BranchRelaxation::verify() { diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index efc328a37e5f..1e2d1c3b93bd 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -136,6 +136,6 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); } -} // namespace +} #endif diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 11eefc4ff63d..06ff9af37fd7 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -135,7 +135,7 @@ struct LDTLSCleanup : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} char LDTLSCleanup::ID = 0; FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() { diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index acb35251fc6d..c2470f747a38 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -43,7 +43,7 @@ private: unsigned BitSize); }; char AArch64ExpandPseudo::ID = 0; -} // namespace +} /// \brief Transfer implicit operands on the pseudo instruction to the /// instructions created from the expansion. diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index d1523e8548e2..c19fcdc4bb18 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -1678,7 +1679,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, bool WantZExt, MachineMemOperand *MMO) { - if(!TLI.allowsMisalignedMemoryAccesses(VT)) + if (!TLI.allowsMisalignedMemoryAccesses(VT)) return 0; // Simplify this down to something we can handle. @@ -1965,7 +1966,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO) { - if(!TLI.allowsMisalignedMemoryAccesses(VT)) + if (!TLI.allowsMisalignedMemoryAccesses(VT)) return false; // Simplify this down to something we can handle. @@ -3070,9 +3071,9 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; - if (!Callee && !SymName) + if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. @@ -3134,8 +3135,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (CM == CodeModel::Small) { const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); - if (SymName) - MIB.addExternalSymbol(SymName, 0); + if (Symbol) + MIB.addSym(Symbol, 0); else if (Addr.getGlobalValue()) MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); else if (Addr.getReg()) { @@ -3145,18 +3146,18 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { return false; } else { unsigned CallReg = 0; - if (SymName) { + if (Symbol) { unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE); + .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); CallReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), - CallReg) - .addReg(ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::LDRXui), CallReg) + .addReg(ADRPReg) + .addSym(Symbol, + AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } else if (Addr.getGlobalValue()) CallReg = materializeGV(Addr.getGlobalValue()); else if (Addr.getReg()) @@ -3460,7 +3461,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; @@ -4734,7 +4736,8 @@ bool AArch64FastISel::selectFRem(const Instruction *I) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 11227eeaf3d7..b496fccba349 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -63,6 +63,6 @@ public: RegScavenger *RS) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0165ef9c49c0..f3242cdd971d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1777,8 +1777,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *Elt = N->getOperand(i).getNode(); + for (const SDValue &Elt : N->op_values()) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); unsigned HalfSize = EltSize / 2; @@ -6689,6 +6688,160 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, return NumBits == 32 || NumBits == 64; } +/// \brief Lower an interleaved load into a ldN intrinsic. +/// +/// E.g. Lower an interleaved load (Factor = 2): +/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr +/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements +/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements +/// +/// Into: +/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr) +/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 +/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 +bool AArch64TargetLowering::lowerInterleavedLoad( + LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, + ArrayRef<unsigned> Indices, unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!Shuffles.empty() && "Empty shufflevector input"); + assert(Shuffles.size() == Indices.size() && + "Unmatched number of shufflevectors and indices"); + + const DataLayout *DL = getDataLayout(); + + VectorType *VecTy = Shuffles[0]->getType(); + unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy); + + // Skip illegal vector types. + if (VecSize != 64 && VecSize != 128) + return false; + + // A pointer vector can not be the return type of the ldN intrinsics. Need to + // load integer vectors first and then convert to pointer vectors. + Type *EltTy = VecTy->getVectorElementType(); + if (EltTy->isPointerTy()) + VecTy = VectorType::get(DL->getIntPtrType(EltTy), + VecTy->getVectorNumElements()); + + Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace()); + Type *Tys[2] = {VecTy, PtrTy}; + static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2, + Intrinsic::aarch64_neon_ld3, + Intrinsic::aarch64_neon_ld4}; + Function *LdNFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); + + IRBuilder<> Builder(LI); + Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy); + + CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN"); + + // Replace uses of each shufflevector with the corresponding vector loaded + // by ldN. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SVI = Shuffles[i]; + unsigned Index = Indices[i]; + + Value *SubVec = Builder.CreateExtractValue(LdN, Index); + + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); + + SVI->replaceAllUsesWith(SubVec); + } + + return true; +} + +/// \brief Get a mask consisting of sequential integers starting from \p Start. +/// +/// I.e. <Start, Start + 1, ..., Start + NumElts - 1> +static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumElts) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < NumElts; i++) + Mask.push_back(Builder.getInt32(Start + i)); + + return ConstantVector::get(Mask); +} + +/// \brief Lower an interleaved store into a stN intrinsic. +/// +/// E.g. Lower an interleaved store (Factor = 3): +/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, +/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> +/// store <12 x i32> %i.vec, <12 x i32>* %ptr +/// +/// Into: +/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> +/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> +/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> +/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr) +/// +/// Note that the new shufflevectors will be removed and we'll only generate one +/// st3 instruction in CodeGen. +bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, + ShuffleVectorInst *SVI, + unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + + VectorType *VecTy = SVI->getType(); + assert(VecTy->getVectorNumElements() % Factor == 0 && + "Invalid interleaved store"); + + unsigned NumSubElts = VecTy->getVectorNumElements() / Factor; + Type *EltTy = VecTy->getVectorElementType(); + VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); + + const DataLayout *DL = getDataLayout(); + unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy); + + // Skip illegal vector types. + if (SubVecSize != 64 && SubVecSize != 128) + return false; + + Value *Op0 = SVI->getOperand(0); + Value *Op1 = SVI->getOperand(1); + IRBuilder<> Builder(SI); + + // StN intrinsics don't support pointer vectors as arguments. Convert pointer + // vectors to integer vectors. + if (EltTy->isPointerTy()) { + Type *IntTy = DL->getIntPtrType(EltTy); + unsigned NumOpElts = + dyn_cast<VectorType>(Op0->getType())->getVectorNumElements(); + + // Convert to the corresponding integer vector. + Type *IntVecTy = VectorType::get(IntTy, NumOpElts); + Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); + Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); + + SubVecTy = VectorType::get(IntTy, NumSubElts); + } + + Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace()); + Type *Tys[2] = {SubVecTy, PtrTy}; + static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2, + Intrinsic::aarch64_neon_st3, + Intrinsic::aarch64_neon_st4}; + Function *StNFunc = + Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); + + SmallVector<Value *, 5> Ops; + + // Split the shufflevector operands into sub vectors for the new stN call. + for (unsigned i = 0; i < Factor; i++) + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts))); + + Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy)); + Builder.CreateCall(StNFunc, Ops); + return true; +} + static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck) { return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index da42376ac250..46298c0e7de1 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -305,6 +305,15 @@ public: unsigned &RequiredAligment) const override; bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override; + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + + bool lowerInterleavedLoad(LoadInst *LI, + ArrayRef<ShuffleVectorInst *> Shuffles, + ArrayRef<unsigned> Indices, + unsigned Factor) const override; + bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const override; + bool isLegalAddImmediate(int64_t) const override; bool isLegalICmpImmediate(int64_t) const override; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 2c52f340d6d1..3f2e772a90c4 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -614,10 +614,15 @@ def move_vec_shift : Operand<i32> { let ParserMatchClass = MoveVecShifterOperand; } -def AddSubImmOperand : AsmOperandClass { - let Name = "AddSubImm"; - let ParserMethod = "tryParseAddSubImm"; - let DiagnosticType = "AddSubSecondSource"; +let DiagnosticType = "AddSubSecondSource" in { + def AddSubImmOperand : AsmOperandClass { + let Name = "AddSubImm"; + let ParserMethod = "tryParseAddSubImm"; + } + def AddSubImmNegOperand : AsmOperandClass { + let Name = "AddSubImmNeg"; + let ParserMethod = "tryParseAddSubImm"; + } } // An ADD/SUB immediate shifter operand: // second operand: @@ -631,8 +636,17 @@ class addsub_shifted_imm<ValueType Ty> let MIOperandInfo = (ops i32imm, i32imm); } +class addsub_shifted_imm_neg<ValueType Ty> + : Operand<Ty> { + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmNegOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + def addsub_shifted_imm32 : addsub_shifted_imm<i32>; def addsub_shifted_imm64 : addsub_shifted_imm<i64>; +def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>; +def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>; class neg_addsub_shifted_imm<ValueType Ty> : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> { @@ -1633,7 +1647,7 @@ class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype, (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2, shiftExt)>; -multiclass AddSub<bit isSub, string mnemonic, +multiclass AddSub<bit isSub, string mnemonic, string alias, SDPatternOperator OpNode = null_frag> { let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Add/Subtract immediate @@ -1686,6 +1700,14 @@ multiclass AddSub<bit isSub, string mnemonic, let Inst{31} = 1; } + // add Rd, Rb, -imm -> sub Rd, Rn, imm + def : InstAlias<alias#" $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias<alias#" $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + // Register/register aliases with no shift when SP is not used. def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), GPR32, GPR32, GPR32, 0>; @@ -1706,7 +1728,8 @@ multiclass AddSub<bit isSub, string mnemonic, GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0 } -multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> { +multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp, + string alias, string cmpAlias> { let isCompare = 1, Defs = [NZCV] in { // Add/Subtract immediate def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32, @@ -1752,6 +1775,14 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> { } } // Defs = [NZCV] + // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm + def : InstAlias<alias#" $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias<alias#" $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + // Compare aliases def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri") WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; @@ -1768,6 +1799,12 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> { def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; + // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm + def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; + // Compare shorthands def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs") WZR, GPR32:$src1, GPR32:$src2, 0), 5>; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 8d8864cfe65f..c0b3f2c60916 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -96,15 +96,10 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } + if (!isUnpredicatedTerminator(I)) return false; @@ -224,15 +219,10 @@ bool AArch64InstrInfo::ReverseBranchCondition( } unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 653f80286b25..b73e0958df90 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -567,8 +567,8 @@ def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; // Add/subtract -defm ADD : AddSub<0, "add", add>; -defm SUB : AddSub<1, "sub">; +defm ADD : AddSub<0, "add", "sub", add>; +defm SUB : AddSub<1, "sub", "add">; def : InstAlias<"mov $dst, $src", (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; @@ -579,8 +579,8 @@ def : InstAlias<"mov $dst, $src", def : InstAlias<"mov $dst, $src", (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; -defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">; -defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">; +defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; +defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e55ae991b635..580427ab3cc1 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -187,6 +187,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, case MachineOperand::MO_ExternalSymbol: MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); break; + case MachineOperand::MO_MCSymbol: + MCOp = LowerSymbolOperand(MO, MO.getMCSymbol()); + break; case MachineOperand::MO_JumpTableIndex: MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex())); break; diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h index 908f66f8e296..1e29b80c2d62 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.h +++ b/lib/Target/AArch64/AArch64MCInstLower.h @@ -47,6 +47,6 @@ public: MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 2a0f0a47b05c..536a8d0f97a0 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -158,6 +158,6 @@ private: MILOHContainer LOHContainerSet; SetOfInstructions LOHRelated; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index bab84631f2b1..5394875a6bc1 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -154,7 +154,7 @@ bool haveSameParity(unsigned reg1, unsigned reg2) { return isOdd(reg1) == isOdd(reg2); } -} // namespace +} bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra) { diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h index c83aea452513..4f656f94ea12 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h @@ -33,6 +33,6 @@ private: // Add constraints between existing chains void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra); }; -} // namespace llvm +} #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index a993b6059131..11932d2b1c22 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -28,6 +28,6 @@ public: unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index e8165a8e4085..1c6b15790ea9 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -57,7 +57,7 @@ private: } }; char AArch64StorePairSuppress::ID = 0; -} // namespace +} // anonymous FunctionPass *llvm::createAArch64StorePairSuppressPass() { return new AArch64StorePairSuppress(); diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index c9b54cc3819c..6bb069423060 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -151,6 +151,6 @@ public: std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 5496a50f6b6e..db6e244337a7 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -225,6 +225,10 @@ void AArch64PassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createInterleavedAccessPass(TM)); + if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ed27cf84bbba..fc91c94351cc 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -407,6 +407,26 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } +unsigned AArch64TTIImpl::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace) { + assert(Factor >= 2 && "Invalid interleave factor"); + assert(isa<VectorType>(VecTy) && "Expect a vector type"); + + if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { + unsigned NumElts = VecTy->getVectorNumElements(); + Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); + unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + + // ldN/stN only support legal vector types of size 64 or 128 in bits. + if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) + return Factor; + } + + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { unsigned Cost = 0; for (auto *I : Tys) { diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 25c22bcd58ec..4dabdadd8eeb 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -139,6 +139,11 @@ public: bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 063c053ffe8a..38e8b4d9a938 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -699,6 +699,25 @@ public: const MCConstantExpr *CE = cast<MCConstantExpr>(Expr); return CE->getValue() >= 0 && CE->getValue() <= 0xfff; } + bool isAddSubImmNeg() const { + if (!isShiftedImm() && !isImm()) + return false; + + const MCExpr *Expr; + + // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'. + if (isShiftedImm()) { + unsigned Shift = ShiftedImm.ShiftAmount; + Expr = ShiftedImm.Val; + if (Shift != 0 && Shift != 12) + return false; + } else + Expr = getImm(); + + // Otherwise it should be a real negative immediate in range: + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff; + } bool isCondCode() const { return Kind == k_CondCode; } bool isSIMDImmType10() const { if (!isImm()) @@ -1219,6 +1238,18 @@ public: } } + void addAddSubImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + const MCExpr *MCE = isShiftedImm() ? getShiftedImmVal() : getImm(); + const MCConstantExpr *CE = cast<MCConstantExpr>(MCE); + int64_t Val = -CE->getValue(); + unsigned ShiftAmt = isShiftedImm() ? ShiftedImm.ShiftAmount : 0; + + Inst.addOperand(MCOperand::createImm(Val)); + Inst.addOperand(MCOperand::createImm(ShiftAmt)); + } + void addCondCodeOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createImm(getCondCode())); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 19544ac600d6..15dee978e229 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -181,6 +181,6 @@ public: static const char *getRegisterName(unsigned RegNo, unsigned AltIdx = AArch64::NoRegAltName); }; -} // namespace llvm +} #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 3e982ee03986..7624c7240d68 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -293,7 +293,7 @@ enum CompactUnwindEncodings { UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800 }; -} // namespace CU +} // end CU namespace // FIXME: This should be in a separate file. class DarwinAArch64AsmBackend : public AArch64AsmBackend { @@ -517,7 +517,7 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); } -} // namespace +} MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 807679fb1a21..1f516d1db896 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -34,7 +34,7 @@ protected: private: }; -} // namespace +} AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index bbcbf514069c..b5b1d1f9e19c 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -156,22 +156,12 @@ private: } void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().createTempSymbol(); - EmitLabel(Start); - auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( Name + "." + Twine(MappingSymbolCounter++))); - - getAssembler().registerSymbol(*Symbol); + EmitLabel(Symbol); Symbol->setType(ELF::STT_NOTYPE); Symbol->setBinding(ELF::STB_LOCAL); Symbol->setExternal(false); - auto Sec = getCurrentSection().first; - assert(Sec && "need a section"); - Symbol->setSection(*Sec); - - const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); - Symbol->setVariableValue(Value); } int64_t MappingSymbolCounter; @@ -213,4 +203,4 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { return new AArch64TargetELFStreamer(S); return nullptr; } -} // namespace llvm +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index ca56f6393c41..342384437c6a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -65,7 +65,7 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI); -} // namespace llvm +} // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index b2f5bf3cf4b5..741b273073e4 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -38,7 +38,7 @@ public: const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; }; -} // namespace +} bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 40071f6b6bb7..7e42f8e3601e 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -346,7 +346,7 @@ namespace AArch64AT { ATMapper(); }; -} // namespace AArch64AT +} namespace AArch64DB { enum DBValues { Invalid = -1, @@ -369,7 +369,7 @@ namespace AArch64DB { DBarrierMapper(); }; -} // namespace AArch64DB +} namespace AArch64DC { enum DCValues { @@ -390,7 +390,7 @@ namespace AArch64DC { DCMapper(); }; -} // namespace AArch64DC +} namespace AArch64IC { enum ICValues { @@ -410,7 +410,7 @@ namespace AArch64IC { static inline bool NeedsRegister(ICValues Val) { return Val == IVAU; } -} // namespace AArch64IC +} namespace AArch64ISB { enum ISBValues { @@ -422,7 +422,7 @@ namespace AArch64ISB { ISBMapper(); }; -} // namespace AArch64ISB +} namespace AArch64PRFM { enum PRFMValues { @@ -452,7 +452,7 @@ namespace AArch64PRFM { PRFMMapper(); }; -} // namespace AArch64PRFM +} namespace AArch64PState { enum PStateValues { @@ -471,7 +471,7 @@ namespace AArch64PState { PStateMapper(); }; -} // namespace AArch64PState +} namespace AArch64SE { enum ShiftExtSpecifiers { @@ -492,7 +492,7 @@ namespace AArch64SE { SXTW, SXTX }; -} // namespace AArch64SE +} namespace AArch64Layout { enum VectorLayout { @@ -514,7 +514,7 @@ namespace AArch64Layout { VL_S, VL_D }; -} // namespace AArch64Layout +} inline static const char * AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) { @@ -1221,7 +1221,7 @@ namespace AArch64SysReg { }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); -} // namespace AArch64SysReg +} namespace AArch64TLBI { enum TLBIValues { @@ -1283,7 +1283,7 @@ namespace AArch64TLBI { return true; } } -} // namespace AArch64TLBI +} namespace AArch64II { /// Target Operand Flag enum. diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 2e7e39a54d33..569ad3844b25 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -141,6 +141,19 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; +class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping> + : SubtargetFeature < + "isaver"#Major#"."#Minor#"."#Stepping, + "IsaVersion", + "ISAVersion"#Major#"_"#Minor#"_"#Stepping, + "Instruction set version number" +>; + +def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>; +def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>; +def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>; +def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>; + class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< "localmemorysize"#Value, "LocalMemorySize", diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index afc6bcb52bb8..709d7531d38b 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,7 +17,9 @@ // #include "AMDGPUAsmPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" +#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" @@ -89,6 +91,15 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)) {} +void AMDGPUAsmPrinter::EmitFunctionBodyStart() { + const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); + SIProgramInfo KernelInfo; + if (STM.isAmdHsaOS()) { + getSIProgramInfo(KernelInfo, *MF); + EmitAmdKernelCodeT(*MF, KernelInfo); + } +} + void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { // This label is used to mark the end of the .text section. @@ -113,13 +124,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; - if (STM.isAmdHsaOS()) { - getSIProgramInfo(KernelInfo, MF); - EmitAmdKernelCodeT(MF, KernelInfo); - OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1)); - } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - getSIProgramInfo(KernelInfo, MF); - EmitProgramInfoSI(MF, KernelInfo); + if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (!STM.isAmdHsaOS()) { + getSIProgramInfo(KernelInfo, MF); + EmitProgramInfoSI(MF, KernelInfo); + } + // Emit directives + AMDGPUTargetStreamer *TS = + static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); + TS->EmitDirectiveHSACodeObjectVersion(1, 0); + AMDGPU::IsaVersion ISA = STM.getIsaVersion(); + TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, + "AMD", "AMDGPU"); } else { EmitProgramInfoR600(MF); } @@ -459,125 +475,28 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, } void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const { + const SIProgramInfo &KernelInfo) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); amd_kernel_code_t header; - memset(&header, 0, sizeof(header)); - - header.amd_code_version_major = AMD_CODE_VERSION_MAJOR; - header.amd_code_version_minor = AMD_CODE_VERSION_MINOR; - - header.struct_byte_size = sizeof(amd_kernel_code_t); - - header.target_chip = STM.getAmdKernelCodeChipID(); - - header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment()); + AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits()); header.compute_pgm_resource_registers = KernelInfo.ComputePGMRSrc1 | (KernelInfo.ComputePGMRSrc2 << 32); + header.code_properties = + AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR | + AMD_CODE_PROPERTY_IS_PTR64; - // Code Properties: - header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR | - AMD_CODE_PROPERTY_IS_PTR64; - - if (KernelInfo.FlatUsed) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; - - if (KernelInfo.ScratchBlocks) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE; - - header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - header.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - - // MFI->ABIArgOffset is the number of bytes for the kernel arguments - // plus 36. 36 is the number of bytes reserved at the begining of the - // input buffer to store work-group size information. - // FIXME: We should be adding the size of the implicit arguments - // to this value. header.kernarg_segment_byte_size = MFI->ABIArgOffset; - header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; - // FIXME: What values do I put for these alignments - header.kernarg_segment_alignment = 0; - header.group_segment_alignment = 0; - header.private_segment_alignment = 0; - - header.code_type = 1; // HSA_EXT_CODE_KERNEL - - header.wavefront_size = STM.getWavefrontSize(); - - MCSectionELF *VersionSection = - OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(VersionSection); - OutStreamer->EmitBytes(Twine("HSA Code Unit:" + - Twine(header.hsail_version_major) + "." + - Twine(header.hsail_version_minor) + ":" + - "AMD:" + - Twine(header.amd_code_version_major) + "." + - Twine(header.amd_code_version_minor) + ":" + - "GFX8.1:0").str()); - - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - - if (isVerbose()) { - OutStreamer->emitRawComment("amd_code_version_major = " + - Twine(header.amd_code_version_major), false); - OutStreamer->emitRawComment("amd_code_version_minor = " + - Twine(header.amd_code_version_minor), false); - OutStreamer->emitRawComment("struct_byte_size = " + - Twine(header.struct_byte_size), false); - OutStreamer->emitRawComment("target_chip = " + - Twine(header.target_chip), false); - OutStreamer->emitRawComment(" compute_pgm_rsrc1: " + - Twine::utohexstr(KernelInfo.ComputePGMRSrc1), - false); - OutStreamer->emitRawComment(" compute_pgm_rsrc2: " + - Twine::utohexstr(KernelInfo.ComputePGMRSrc2), - false); - OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " + - Twine((bool)(header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false); - OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " + - Twine((bool)(header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false); - OutStreamer->emitRawComment("private_element_size = 2 ", false); - OutStreamer->emitRawComment("is_ptr64 = " + - Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false); - OutStreamer->emitRawComment("workitem_private_segment_byte_size = " + - Twine(header.workitem_private_segment_byte_size), - false); - OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " + - Twine(header.workgroup_group_segment_byte_size), - false); - OutStreamer->emitRawComment("gds_segment_byte_size = " + - Twine(header.gds_segment_byte_size), false); - OutStreamer->emitRawComment("kernarg_segment_byte_size = " + - Twine(header.kernarg_segment_byte_size), false); - OutStreamer->emitRawComment("wavefront_sgpr_count = " + - Twine(header.wavefront_sgpr_count), false); - OutStreamer->emitRawComment("workitem_vgpr_count = " + - Twine(header.workitem_vgpr_count), false); - OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false); - OutStreamer->emitRawComment("wavefront_size = " + - Twine((int)header.wavefront_size), false); - OutStreamer->emitRawComment("optimization_level = " + - Twine(header.optimization_level), false); - OutStreamer->emitRawComment("hsail_profile = " + - Twine(header.hsail_profile), false); - OutStreamer->emitRawComment("hsail_machine_model = " + - Twine(header.hsail_machine_model), false); - OutStreamer->emitRawComment("hsail_version_major = " + - Twine(header.hsail_version_major), false); - OutStreamer->emitRawComment("hsail_version_minor = " + - Twine(header.hsail_version_minor), false); - } - OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header))); + AMDGPUTargetStreamer *TS = + static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); + TS->EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 92072512e6b5..345af9b85e15 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -97,6 +97,8 @@ public: /// Implemented in AMDGPUMCInstLower.cpp void EmitInstruction(const MachineInstr *MI) override; + void EmitFunctionBodyStart() override; + void EmitEndOfAsmFile(Module &M) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -108,6 +110,6 @@ protected: size_t DisasmLineMaxLen; }; -} // namespace llvm +} // End anonymous llvm #endif diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 570473d85585..d56838ec2019 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -68,7 +68,7 @@ public: }; int DiagnosticInfoUnsupported::KindID = 0; -} // namespace +} static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 31ae9a3c7760..86d3962b3856 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -198,7 +198,7 @@ namespace AMDGPU { int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); } // End namespace AMDGPU -} // namespace llvm +} // End llvm namespace #define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63) #define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62) diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index e17b41ad5f21..f5e4694e76f6 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -41,5 +41,5 @@ public: bool IsKernel; }; -} // namespace llvm +} #endif diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 605ccd0e1361..0779d1d786b2 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -72,6 +72,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), + IsaVersion(ISAVersion0_0_0), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), @@ -109,6 +110,10 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { } } +AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { + return AMDGPU::getIsaVersion(getFeatureBits()); +} + bool AMDGPUSubtarget::isVGPRSpillingEnabled( const SIMachineFunctionInfo *MFI) const { return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; @@ -131,3 +136,4 @@ void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.OnlyBottomUp = false; } } + diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 0d40d14f8203..30f50eb1d2f3 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -20,6 +20,8 @@ #include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "R600ISelLowering.h" +#include "AMDKernelCodeT.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -48,6 +50,14 @@ public: FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 }; + enum { + ISAVersion0_0_0, + ISAVersion7_0_0, + ISAVersion7_0_1, + ISAVersion8_0_0, + ISAVersion8_0_1 + }; + private: std::string DevName; bool Is64bit; @@ -77,6 +87,7 @@ private: bool CIInsts; bool FeatureDisable; int LDSBankCount; + unsigned IsaVersion; AMDGPUFrameLowering FrameLowering; std::unique_ptr<AMDGPUTargetLowering> TLInfo; @@ -236,6 +247,8 @@ public: unsigned getAmdKernelCodeChipID() const; + AMDGPU::IsaVersion getIsaVersion() const; + bool enableMachineScheduler() const override { return true; } @@ -275,6 +288,13 @@ public: bool enableSubRegLiveness() const override { return true; } + + /// \brief Returns the offset in bytes from the start of the input buffer + /// of the first explicit kernel argument. + unsigned getExplicitKernelArgOffset() const { + return isAmdHsaOS() ? 0 : 36; + } + }; } // End namespace llvm diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h index eaffb854793c..a9ba60c8cbad 100644 --- a/lib/Target/AMDGPU/AMDKernelCodeT.h +++ b/lib/Target/AMDGPU/AMDKernelCodeT.h @@ -12,9 +12,12 @@ #ifndef AMDKERNELCODET_H #define AMDKERNELCODET_H +#include "llvm/MC/SubtargetFeature.h" + #include <cstddef> #include <cstdint> +#include "llvm/Support/Debug.h" //---------------------------------------------------------------------------// // AMD Kernel Code, and its dependencies // //---------------------------------------------------------------------------// @@ -142,7 +145,7 @@ enum amd_code_property_mask_t { /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This /// is generally DWORD. /// - /// Use values from the amd_element_byte_size_t enum. + /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM. AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT, @@ -171,7 +174,11 @@ enum amd_code_property_mask_t { /// Indicate if code generated has support for debugging. AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15, AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1, - AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT + AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT, + + AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15, + AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1, + AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT }; /// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL @@ -369,7 +376,7 @@ typedef struct hsa_ext_control_directives_s { /// Scratch Wave Offset must be added by the kernel code and moved to /// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions. /// -/// The second SGPR is 32 bit byte size of a single work-item’s scratch +/// The second SGPR is 32 bit byte size of a single work-item's scratch /// memory usage. This is directly loaded from the dispatch packet Private /// Segment Byte Size and rounded up to a multiple of DWORD. /// @@ -385,7 +392,7 @@ typedef struct hsa_ext_control_directives_s { /// /// Private Segment Size (enable_sgpr_private_segment_size): /// Number of User SGPR registers: 1. The 32 bit byte size of a single -/// work-item’s scratch memory allocation. This is the value from the dispatch +/// work-item's scratch memory allocation. This is the value from the dispatch /// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD. /// /// \todo [Does CP need to round this to >4 byte alignment?] @@ -433,7 +440,7 @@ typedef struct hsa_ext_control_directives_s { /// present /// /// Work-Group Info (enable_sgpr_workgroup_info): -/// Number of System SGPR registers: 1. {first_wave, 14’b0000, +/// Number of System SGPR registers: 1. {first_wave, 14'b0000, /// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]} /// /// Private Segment Wave Byte Offset @@ -499,25 +506,14 @@ typedef struct hsa_ext_control_directives_s { /// Alternatively scalar loads can be used if the kernarg offset is uniform, as /// the kernarg segment is constant for the duration of the kernel execution. /// -typedef struct amd_kernel_code_s { - /// The AMD major version of the Code Object. Must be the value - /// AMD_CODE_VERSION_MAJOR. - amd_code_version32_t amd_code_version_major; - /// The AMD minor version of the Code Object. Minor versions must be - /// backward compatible. Must be the value - /// AMD_CODE_VERSION_MINOR. - amd_code_version32_t amd_code_version_minor; - - /// The byte size of this struct. Must be set to - /// sizeof(amd_kernel_code_t). Used for backward - /// compatibility. - uint32_t struct_byte_size; - - /// The target chip instruction set for which code has been - /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration - /// in sc/Interface/SCCommon.h. - uint32_t target_chip; +typedef struct amd_kernel_code_s { + uint32_t amd_kernel_code_version_major; + uint32_t amd_kernel_code_version_minor; + uint16_t amd_machine_kind; + uint16_t amd_machine_version_major; + uint16_t amd_machine_version_minor; + uint16_t amd_machine_version_stepping; /// Byte offset (possibly negative) from start of amd_kernel_code_t /// object to kernel's entry point instruction. The actual code for @@ -535,10 +531,6 @@ typedef struct amd_kernel_code_s { /// and size. The offset is from the start (possibly negative) of /// amd_kernel_code_t object. Set both to 0 if no prefetch /// information is available. - /// - /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did - /// not make the size a uint64_t as prefetching more than 4GiB seems - /// excessive. int64_t kernel_code_prefetch_byte_offset; uint64_t kernel_code_prefetch_byte_size; @@ -553,11 +545,11 @@ typedef struct amd_kernel_code_s { /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and /// COMPUTE_PGM_RSRC2 registers. - amd_compute_pgm_resource_register64_t compute_pgm_resource_registers; + uint64_t compute_pgm_resource_registers; /// Code properties. See amd_code_property_mask_t for a full list of /// properties. - amd_code_property32_t code_properties; + uint32_t code_properties; /// The amount of memory required for the combined private, spill /// and arg segments for a work-item in bytes. If @@ -629,76 +621,21 @@ typedef struct amd_kernel_code_s { /// The maximum byte alignment of variables used by the kernel in /// the specified memory segment. Expressed as a power of two. Must /// be at least HSA_POWERTWO_16. - hsa_powertwo8_t kernarg_segment_alignment; - hsa_powertwo8_t group_segment_alignment; - hsa_powertwo8_t private_segment_alignment; - - uint8_t reserved3; - - /// Type of code object. - hsa_ext_code_kind32_t code_type; - - /// Reserved for code properties if any are defined in the future. - /// There are currently no code properties so this field must be 0. - uint32_t reserved4; + uint8_t kernarg_segment_alignment; + uint8_t group_segment_alignment; + uint8_t private_segment_alignment; /// Wavefront size expressed as a power of two. Must be a power of 2 /// in range 1..64 inclusive. Used to support runtime query that /// obtains wavefront size, which may be used by application to /// allocated dynamic group memory and set the dispatch work-group /// size. - hsa_powertwo8_t wavefront_size; - - /// The optimization level specified when the kernel was - /// finalized. - uint8_t optimization_level; - - /// The HSAIL profile defines which features are used. This - /// information is from the HSAIL version directive. If this - /// amd_kernel_code_t is not generated from an HSAIL compilation - /// unit then must be 0. - hsa_ext_brig_profile8_t hsail_profile; - - /// The HSAIL machine model gives the address sizes used by the - /// code. This information is from the HSAIL version directive. If - /// not generated from an HSAIL compilation unit then must still - /// indicate for what machine mode the code is generated. - hsa_ext_brig_machine_model8_t hsail_machine_model; - - /// The HSAIL major version. This information is from the HSAIL - /// version directive. If this amd_kernel_code_t is not - /// generated from an HSAIL compilation unit then must be 0. - uint32_t hsail_version_major; - - /// The HSAIL minor version. This information is from the HSAIL - /// version directive. If this amd_kernel_code_t is not - /// generated from an HSAIL compilation unit then must be 0. - uint32_t hsail_version_minor; - - /// Reserved for HSAIL target options if any are defined in the - /// future. There are currently no target options so this field - /// must be 0. - uint16_t reserved5; - - /// Reserved. Must be 0. - uint16_t reserved6; - - /// The values should be the actually values used by the finalizer - /// in generating the code. This may be the union of values - /// specified as finalizer arguments and explicit HSAIL control - /// directives. If the finalizer chooses to ignore a control - /// directive, and not generate constrained code, then the control - /// directive should not be marked as enabled even though it was - /// present in the HSAIL or finalizer argument. The values are - /// intended to reflect the constraints that the code actually - /// requires to correctly execute, not the values that were - /// actually specified at finalize time. - hsa_ext_control_directives_t control_directive; - - /// The code can immediately follow the amd_kernel_code_t, or can - /// come after subsequent amd_kernel_code_t structs when there are - /// multiple kernels in the compilation unit. + uint8_t wavefront_size; + int32_t call_convention; + uint8_t reserved3[12]; + uint64_t runtime_loader_kernel_symbol; + uint64_t control_directives[16]; } amd_kernel_code_t; #endif // AMDKERNELCODET_H diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 80081d40d089..2018983bc306 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" @@ -314,6 +317,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { /// } +private: + bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); + bool ParseDirectiveHSACodeObjectVersion(); + bool ParseDirectiveHSACodeObjectISA(); + bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); + bool ParseDirectiveAMDKernelCodeT(); + public: AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, @@ -329,6 +339,11 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } + AMDGPUTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<AMDGPUTargetStreamer &>(TS); + } + unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } @@ -403,7 +418,7 @@ struct OptionalOperand { bool (*ConvertResult)(int64_t&); }; -} // namespace +} static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { if (IsVgpr) { @@ -581,7 +596,304 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, llvm_unreachable("Implement any new match types added!"); } +bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, + uint32_t &Minor) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid major version"); + + Major = getLexer().getTok().getIntVal(); + Lex(); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("minor version number required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid minor version"); + + Minor = getLexer().getTok().getIntVal(); + Lex(); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { + + uint32_t Major; + uint32_t Minor; + + if (ParseDirectiveMajorMinor(Major, Minor)) + return true; + + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { + + uint32_t Major; + uint32_t Minor; + uint32_t Stepping; + StringRef VendorName; + StringRef ArchName; + + // If this directive has no arguments, then use the ISA version for the + // targeted GPU. + if (getLexer().is(AsmToken::EndOfStatement)) { + AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits()); + getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor, + Isa.Stepping, + "AMD", "AMDGPU"); + return false; + } + + + if (ParseDirectiveMajorMinor(Major, Minor)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("stepping version number required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid stepping version"); + + Stepping = getLexer().getTok().getIntVal(); + Lex(); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("vendor name required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("invalid vendor name"); + + VendorName = getLexer().getTok().getStringContents(); + Lex(); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("arch name required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("invalid arch name"); + + ArchName = getLexer().getTok().getStringContents(); + Lex(); + + getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, + VendorName, ArchName); + return false; +} + +bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, + amd_kernel_code_t &Header) { + + if (getLexer().isNot(AsmToken::Equal)) + return TokError("expected '='"); + Lex(); + + if (getLexer().isNot(AsmToken::Integer)) + return TokError("amd_kernel_code_t values must be integers"); + + uint64_t Value = getLexer().getTok().getIntVal(); + Lex(); + + if (ID == "kernel_code_version_major") + Header.amd_kernel_code_version_major = Value; + else if (ID == "kernel_code_version_minor") + Header.amd_kernel_code_version_minor = Value; + else if (ID == "machine_kind") + Header.amd_machine_kind = Value; + else if (ID == "machine_version_major") + Header.amd_machine_version_major = Value; + else if (ID == "machine_version_minor") + Header.amd_machine_version_minor = Value; + else if (ID == "machine_version_stepping") + Header.amd_machine_version_stepping = Value; + else if (ID == "kernel_code_entry_byte_offset") + Header.kernel_code_entry_byte_offset = Value; + else if (ID == "kernel_code_prefetch_byte_size") + Header.kernel_code_prefetch_byte_size = Value; + else if (ID == "max_scratch_backing_memory_byte_size") + Header.max_scratch_backing_memory_byte_size = Value; + else if (ID == "compute_pgm_rsrc1_vgprs") + Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value); + else if (ID == "compute_pgm_rsrc1_sgprs") + Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value); + else if (ID == "compute_pgm_rsrc1_priority") + Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value); + else if (ID == "compute_pgm_rsrc1_float_mode") + Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value); + else if (ID == "compute_pgm_rsrc1_priv") + Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value); + else if (ID == "compute_pgm_rsrc1_dx10_clamp") + Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value); + else if (ID == "compute_pgm_rsrc1_debug_mode") + Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value); + else if (ID == "compute_pgm_rsrc1_ieee_mode") + Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value); + else if (ID == "compute_pgm_rsrc2_scratch_en") + Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32); + else if (ID == "compute_pgm_rsrc2_user_sgpr") + Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32); + else if (ID == "compute_pgm_rsrc2_tgid_x_en") + Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32); + else if (ID == "compute_pgm_rsrc2_tgid_y_en") + Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32); + else if (ID == "compute_pgm_rsrc2_tgid_z_en") + Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32); + else if (ID == "compute_pgm_rsrc2_tg_size_en") + Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32); + else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt") + Header.compute_pgm_resource_registers |= + (S_00B84C_TIDIG_COMP_CNT(Value) << 32); + else if (ID == "compute_pgm_rsrc2_excp_en_msb") + Header.compute_pgm_resource_registers |= + (S_00B84C_EXCP_EN_MSB(Value) << 32); + else if (ID == "compute_pgm_rsrc2_lds_size") + Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32); + else if (ID == "compute_pgm_rsrc2_excp_en") + Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32); + else if (ID == "compute_pgm_resource_registers") + Header.compute_pgm_resource_registers = Value; + else if (ID == "enable_sgpr_private_segment_buffer") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT); + else if (ID == "enable_sgpr_dispatch_ptr") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT); + else if (ID == "enable_sgpr_queue_ptr") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT); + else if (ID == "enable_sgpr_kernarg_segment_ptr") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT); + else if (ID == "enable_sgpr_dispatch_id") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT); + else if (ID == "enable_sgpr_flat_scratch_init") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT); + else if (ID == "enable_sgpr_private_segment_size") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT); + else if (ID == "enable_sgpr_grid_workgroup_count_x") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT); + else if (ID == "enable_sgpr_grid_workgroup_count_y") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT); + else if (ID == "enable_sgpr_grid_workgroup_count_z") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT); + else if (ID == "enable_ordered_append_gds") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT); + else if (ID == "private_element_size") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT); + else if (ID == "is_ptr64") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT); + else if (ID == "is_dynamic_callstack") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT); + else if (ID == "is_debug_enabled") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT); + else if (ID == "is_xnack_enabled") + Header.code_properties |= + (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT); + else if (ID == "workitem_private_segment_byte_size") + Header.workitem_private_segment_byte_size = Value; + else if (ID == "workgroup_group_segment_byte_size") + Header.workgroup_group_segment_byte_size = Value; + else if (ID == "gds_segment_byte_size") + Header.gds_segment_byte_size = Value; + else if (ID == "kernarg_segment_byte_size") + Header.kernarg_segment_byte_size = Value; + else if (ID == "workgroup_fbarrier_count") + Header.workgroup_fbarrier_count = Value; + else if (ID == "wavefront_sgpr_count") + Header.wavefront_sgpr_count = Value; + else if (ID == "workitem_vgpr_count") + Header.workitem_vgpr_count = Value; + else if (ID == "reserved_vgpr_first") + Header.reserved_vgpr_first = Value; + else if (ID == "reserved_vgpr_count") + Header.reserved_vgpr_count = Value; + else if (ID == "reserved_sgpr_first") + Header.reserved_sgpr_first = Value; + else if (ID == "reserved_sgpr_count") + Header.reserved_sgpr_count = Value; + else if (ID == "debug_wavefront_private_segment_offset_sgpr") + Header.debug_wavefront_private_segment_offset_sgpr = Value; + else if (ID == "debug_private_segment_buffer_sgpr") + Header.debug_private_segment_buffer_sgpr = Value; + else if (ID == "kernarg_segment_alignment") + Header.kernarg_segment_alignment = Value; + else if (ID == "group_segment_alignment") + Header.group_segment_alignment = Value; + else if (ID == "private_segment_alignment") + Header.private_segment_alignment = Value; + else if (ID == "wavefront_size") + Header.wavefront_size = Value; + else if (ID == "call_convention") + Header.call_convention = Value; + else if (ID == "runtime_loader_kernel_symbol") + Header.runtime_loader_kernel_symbol = Value; + else + return TokError("amd_kernel_code_t value not recognized."); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { + + amd_kernel_code_t Header; + AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits()); + + while (true) { + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("amd_kernel_code_t values must begin on a new line"); + + // Lex EndOfStatement. This is in a while loop, because lexing a comment + // will set the current token to EndOfStatement. + while(getLexer().is(AsmToken::EndOfStatement)) + Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected value identifier or .end_amd_kernel_code_t"); + + StringRef ID = getLexer().getTok().getIdentifier(); + Lex(); + + if (ID == ".end_amd_kernel_code_t") + break; + + if (ParseAMDKernelCodeTValue(ID, Header)) + return true; + } + + getTargetStreamer().EmitAMDKernelCodeT(Header); + + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".hsa_code_object_version") + return ParseDirectiveHSACodeObjectVersion(); + + if (IDVal == ".hsa_code_object_isa") + return ParseDirectiveHSACodeObjectISA(); + + if (IDVal == ".amd_kernel_code_t") + return ParseDirectiveAMDKernelCodeT(); + return true; } diff --git a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt index 63d44d1e06f1..dab0c6f585af 100644 --- a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt +++ b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AMDGPUAsmParser parent = AMDGPU -required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo Support +required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo AMDGPUUtils Support add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 3e5ff1f3c6d4..9460bf6b9338 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -62,3 +62,4 @@ add_subdirectory(AsmParser) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) +add_subdirectory(Utils) diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt index c6861df91ed6..38c5489586f1 100644 --- a/lib/Target/AMDGPU/LLVMBuild.txt +++ b/lib/Target/AMDGPU/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo Utils [component_0] type = TargetGroup @@ -29,5 +29,5 @@ has_asmprinter = 1 type = Library name = AMDGPUCodeGen parent = AMDGPU -required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo Scalar SelectionDAG Support Target TransformUtils +required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 8bed2deef4cd..468563c44982 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -127,11 +127,14 @@ bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { namespace { class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { + bool Is64Bit; + public: - ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { } + ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) : + AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { } MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - return createAMDGPUELFObjectWriter(OS); + return createAMDGPUELFObjectWriter(Is64Bit, OS); } }; @@ -140,5 +143,8 @@ public: MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU) { - return new ELFAMDGPUAsmBackend(T); + Triple TargetTriple(TT); + + // Use 64-bit ELF for amdgcn + return new ELFAMDGPUAsmBackend(T, TargetTriple.getArch() == Triple::amdgcn); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 59f45ff02d88..820f17df8960 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -18,7 +18,7 @@ namespace { class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: - AMDGPUELFObjectWriter(); + AMDGPUELFObjectWriter(bool Is64Bit); protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { @@ -30,10 +30,11 @@ protected: } // End anonymous namespace -AMDGPUELFObjectWriter::AMDGPUELFObjectWriter() - : MCELFObjectTargetWriter(false, 0, 0, false) { } +AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit) + : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA, + ELF::EM_AMDGPU, false) { } -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) { - MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(); +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) { + MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit); return createELFObjectWriter(MOTW, OS, true); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h index fa3b3c3d9489..01021d67ffd9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -28,7 +28,7 @@ enum Fixups { LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; -} // namespace AMDGPU -} // namespace llvm +} +} #endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index a7d3dd1345f9..7172e4bb9335 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -14,6 +14,7 @@ #include "AMDGPUMCTargetDesc.h" #include "AMDGPUMCAsmInfo.h" +#include "AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "SIDefines.h" #include "llvm/MC/MCCodeGenInfo.h" @@ -72,6 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T, return new AMDGPUInstPrinter(MAI, MII, MRI); } +static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new AMDGPUTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer * createAMDGPUObjectTargetStreamer( + MCStreamer &S, + const MCSubtargetInfo &STI) { + return new AMDGPUTargetELFStreamer(S); +} + extern "C" void LLVMInitializeAMDGPUTargetMC() { for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) { RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T); @@ -84,7 +98,15 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() { TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend); } + // R600 specific registration TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createR600MCCodeEmitter); + + // GCN specific registration TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter); + + TargetRegistry::RegisterAsmTargetStreamer(TheGCNTarget, + createAMDGPUAsmTargetStreamer); + TargetRegistry::RegisterObjectTargetStreamer(TheGCNTarget, + createAMDGPUObjectTargetStreamer); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index ac611b862a1a..5d1b86b8c0c2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -46,8 +46,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); -MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS); -} // namespace llvm +MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit, + raw_pwrite_stream &OS); +} // End llvm namespace #define GET_REGINFO_ENUM #include "AMDGPUGenRegisterInfo.inc" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp new file mode 100644 index 000000000000..09e6cb1f1ffc --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -0,0 +1,297 @@ +//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AMDGPU specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetStreamer.h" +#include "SIDefines.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S) { } + +//===----------------------------------------------------------------------===// +// AMDGPUTargetAsmStreamer +//===----------------------------------------------------------------------===// + +AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : AMDGPUTargetStreamer(S), OS(OS) { } + +void +AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, + uint32_t Minor) { + OS << "\t.hsa_code_object_version " << + Twine(Major) << "," << Twine(Minor) << '\n'; +} + +void +AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, + uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) { + OS << "\t.hsa_code_object_isa " << + Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << + ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; + +} + +void +AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { + uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32); + bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + bool EnableSGPRDispatchPtr = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + bool EnableSGPRQueuePtr = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + bool EnableSGPRKernargSegmentPtr = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + bool EnableSGPRDispatchID = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + bool EnableSGPRFlatScratchInit = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + bool EnableSGPRPrivateSegmentSize = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + bool EnableSGPRGridWorkgroupCountX = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X); + bool EnableSGPRGridWorkgroupCountY = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y); + bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z); + bool EnableOrderedAppendGDS = (Header.code_properties & + AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS); + uint32_t PrivateElementSize = (Header.code_properties & + AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >> + AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT; + bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64); + bool IsDynamicCallstack = (Header.code_properties & + AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); + bool IsDebugEnabled = (Header.code_properties & + AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED); + bool IsXNackEnabled = (Header.code_properties & + AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED); + + OS << "\t.amd_kernel_code_t\n" << + "\t\tkernel_code_version_major = " << + Header.amd_kernel_code_version_major << '\n' << + "\t\tkernel_code_version_minor = " << + Header.amd_kernel_code_version_minor << '\n' << + "\t\tmachine_kind = " << + Header.amd_machine_kind << '\n' << + "\t\tmachine_version_major = " << + Header.amd_machine_version_major << '\n' << + "\t\tmachine_version_minor = " << + Header.amd_machine_version_minor << '\n' << + "\t\tmachine_version_stepping = " << + Header.amd_machine_version_stepping << '\n' << + "\t\tkernel_code_entry_byte_offset = " << + Header.kernel_code_entry_byte_offset << '\n' << + "\t\tkernel_code_prefetch_byte_size = " << + Header.kernel_code_prefetch_byte_size << '\n' << + "\t\tmax_scratch_backing_memory_byte_size = " << + Header.max_scratch_backing_memory_byte_size << '\n' << + "\t\tcompute_pgm_rsrc1_vgprs = " << + G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_sgprs = " << + G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_priority = " << + G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_float_mode = " << + G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_priv = " << + G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_dx10_clamp = " << + G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_debug_mode = " << + G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc1_ieee_mode = " << + G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' << + "\t\tcompute_pgm_rsrc2_scratch_en = " << + G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_user_sgpr = " << + G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_tgid_x_en = " << + G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_tgid_y_en = " << + G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_tgid_z_en = " << + G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_tg_size_en = " << + G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " << + G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_excp_en_msb = " << + G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_lds_size = " << + G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' << + "\t\tcompute_pgm_rsrc2_excp_en = " << + G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' << + + "\t\tenable_sgpr_private_segment_buffer = " << + EnableSGPRPrivateSegmentBuffer << '\n' << + "\t\tenable_sgpr_dispatch_ptr = " << + EnableSGPRDispatchPtr << '\n' << + "\t\tenable_sgpr_queue_ptr = " << + EnableSGPRQueuePtr << '\n' << + "\t\tenable_sgpr_kernarg_segment_ptr = " << + EnableSGPRKernargSegmentPtr << '\n' << + "\t\tenable_sgpr_dispatch_id = " << + EnableSGPRDispatchID << '\n' << + "\t\tenable_sgpr_flat_scratch_init = " << + EnableSGPRFlatScratchInit << '\n' << + "\t\tenable_sgpr_private_segment_size = " << + EnableSGPRPrivateSegmentSize << '\n' << + "\t\tenable_sgpr_grid_workgroup_count_x = " << + EnableSGPRGridWorkgroupCountX << '\n' << + "\t\tenable_sgpr_grid_workgroup_count_y = " << + EnableSGPRGridWorkgroupCountY << '\n' << + "\t\tenable_sgpr_grid_workgroup_count_z = " << + EnableSGPRGridWorkgroupCountZ << '\n' << + "\t\tenable_ordered_append_gds = " << + EnableOrderedAppendGDS << '\n' << + "\t\tprivate_element_size = " << + PrivateElementSize << '\n' << + "\t\tis_ptr64 = " << + IsPtr64 << '\n' << + "\t\tis_dynamic_callstack = " << + IsDynamicCallstack << '\n' << + "\t\tis_debug_enabled = " << + IsDebugEnabled << '\n' << + "\t\tis_xnack_enabled = " << + IsXNackEnabled << '\n' << + "\t\tworkitem_private_segment_byte_size = " << + Header.workitem_private_segment_byte_size << '\n' << + "\t\tworkgroup_group_segment_byte_size = " << + Header.workgroup_group_segment_byte_size << '\n' << + "\t\tgds_segment_byte_size = " << + Header.gds_segment_byte_size << '\n' << + "\t\tkernarg_segment_byte_size = " << + Header.kernarg_segment_byte_size << '\n' << + "\t\tworkgroup_fbarrier_count = " << + Header.workgroup_fbarrier_count << '\n' << + "\t\twavefront_sgpr_count = " << + Header.wavefront_sgpr_count << '\n' << + "\t\tworkitem_vgpr_count = " << + Header.workitem_vgpr_count << '\n' << + "\t\treserved_vgpr_first = " << + Header.reserved_vgpr_first << '\n' << + "\t\treserved_vgpr_count = " << + Header.reserved_vgpr_count << '\n' << + "\t\treserved_sgpr_first = " << + Header.reserved_sgpr_first << '\n' << + "\t\treserved_sgpr_count = " << + Header.reserved_sgpr_count << '\n' << + "\t\tdebug_wavefront_private_segment_offset_sgpr = " << + Header.debug_wavefront_private_segment_offset_sgpr << '\n' << + "\t\tdebug_private_segment_buffer_sgpr = " << + Header.debug_private_segment_buffer_sgpr << '\n' << + "\t\tkernarg_segment_alignment = " << + (uint32_t)Header.kernarg_segment_alignment << '\n' << + "\t\tgroup_segment_alignment = " << + (uint32_t)Header.group_segment_alignment << '\n' << + "\t\tprivate_segment_alignment = " << + (uint32_t)Header.private_segment_alignment << '\n' << + "\t\twavefront_size = " << + (uint32_t)Header.wavefront_size << '\n' << + "\t\tcall_convention = " << + Header.call_convention << '\n' << + "\t\truntime_loader_kernel_symbol = " << + Header.runtime_loader_kernel_symbol << '\n' << + // TODO: control_directives + "\t.end_amd_kernel_code_t\n"; + +} + +//===----------------------------------------------------------------------===// +// AMDGPUTargetELFStreamer +//===----------------------------------------------------------------------===// + +AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S) + : AMDGPUTargetStreamer(S), Streamer(S) { } + +MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { + return static_cast<MCELFStreamer &>(Streamer); +} + +void +AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, + uint32_t Minor) { + MCStreamer &OS = getStreamer(); + MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); + + unsigned NameSZ = 4; + + OS.PushSection(); + OS.SwitchSection(Note); + OS.EmitIntValue(NameSZ, 4); // namesz + OS.EmitIntValue(8, 4); // descz + OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type + OS.EmitBytes(StringRef("AMD", NameSZ)); // name + OS.EmitIntValue(Major, 4); // desc + OS.EmitIntValue(Minor, 4); + OS.EmitValueToAlignment(4); + OS.PopSection(); +} + +void +AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, + uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) { + MCStreamer &OS = getStreamer(); + MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); + + unsigned NameSZ = 4; + uint16_t VendorNameSize = VendorName.size() + 1; + uint16_t ArchNameSize = ArchName.size() + 1; + unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + + sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + + VendorNameSize + ArchNameSize; + + OS.PushSection(); + OS.SwitchSection(Note); + OS.EmitIntValue(NameSZ, 4); // namesz + OS.EmitIntValue(DescSZ, 4); // descsz + OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type + OS.EmitBytes(StringRef("AMD", 4)); // name + OS.EmitIntValue(VendorNameSize, 2); // desc + OS.EmitIntValue(ArchNameSize, 2); + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + OS.EmitIntValue(Stepping, 4); + OS.EmitBytes(VendorName); + OS.EmitIntValue(0, 1); // NULL terminate VendorName + OS.EmitBytes(ArchName); + OS.EmitIntValue(0, 1); // NULL terminte ArchName + OS.EmitValueToAlignment(4); + OS.PopSection(); +} + +void +AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { + + MCStreamer &OS = getStreamer(); + OS.PushSection(); + OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection()); + OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); + OS.PopSection(); +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h new file mode 100644 index 000000000000..d37677c6b863 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -0,0 +1,77 @@ +//===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDKernelCodeT.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +namespace llvm { + +class MCELFStreamer; + +class AMDGPUTargetStreamer : public MCTargetStreamer { +public: + AMDGPUTargetStreamer(MCStreamer &S); + virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, + uint32_t Minor) = 0; + + virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) = 0; + + virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0; +}; + +class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { + formatted_raw_ostream &OS; +public: + AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + void EmitDirectiveHSACodeObjectVersion(uint32_t Major, + uint32_t Minor) override; + + void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, + uint32_t Stepping, StringRef VendorName, + StringRef ArchName) override; + + void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; +}; + +class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { + + enum NoteType { + NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, + NT_AMDGPU_HSA_HSAIL = 2, + NT_AMDGPU_HSA_ISA = 3, + NT_AMDGPU_HSA_PRODUCER = 4, + NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, + NT_AMDGPU_HSA_EXTENSION = 6, + NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, + NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 + }; + + MCStreamer &Streamer; + +public: + AMDGPUTargetELFStreamer(MCStreamer &S); + + MCELFStreamer &getStreamer(); + + void EmitDirectiveHSACodeObjectVersion(uint32_t Major, + uint32_t Minor) override; + + void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, + uint32_t Stepping, StringRef VendorName, + StringRef ArchName) override; + + void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; + +}; + +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 151d0d5f83de..8306a051ff98 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMAMDGPUDesc AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp AMDGPUMCAsmInfo.cpp + AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp ) diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile index 2e2de5020867..219f34daa24f 100644 --- a/lib/Target/AMDGPU/Makefile +++ b/lib/Target/AMDGPU/Makefile @@ -18,6 +18,6 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \ AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc -DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc +DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index c0ffede51999..69efb8b8bc43 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -104,7 +104,7 @@ def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; //===----------------------------------------------------------------------===// def : ProcessorModel<"bonaire", SIQuarterSpeedModel, - [FeatureSeaIslands, FeatureLDSBankCount32] + [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0] >; def : ProcessorModel<"kabini", SIQuarterSpeedModel, @@ -112,11 +112,12 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel, >; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, - [FeatureSeaIslands, FeatureLDSBankCount32] + [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0] >; def : ProcessorModel<"hawaii", SIFullSpeedModel, - [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32] + [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32, + FeatureISAVersion7_0_1] >; def : ProcessorModel<"mullins", SIQuarterSpeedModel, @@ -127,11 +128,13 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, //===----------------------------------------------------------------------===// def : ProcessorModel<"tonga", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureSGPRInitBug] + [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0] >; def : ProcessorModel<"iceland", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureSGPRInitBug] + [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0] >; -def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"carrizo", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureISAVersion8_0_1] +>; diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h index 6ff0a2204cfa..51d87eda31d1 100644 --- a/lib/Target/AMDGPU/R600Defines.h +++ b/lib/Target/AMDGPU/R600Defines.h @@ -48,7 +48,7 @@ namespace R600_InstFlag { IS_EXPORT = (1 << 17), LDS_1A2D = (1 << 18) }; -} // namespace R600_InstFlag +} #define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS) @@ -138,7 +138,7 @@ namespace OpName { VEC_COUNT }; -} // namespace OpName +} //===----------------------------------------------------------------------===// // Config register definitions diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index c25287806988..c06d3c4fd309 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -75,6 +75,6 @@ private: SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; }; -} // namespace llvm +} // End namespace llvm; #endif diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 5ef883cbcadd..855fa9fe45b2 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -697,15 +697,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } + // AMDGPU::BRANCH* instructions are only available after isel and are not // handled if (isBranch(I->getOpcode())) diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h index 9c5f76c882f1..dee4c2b9ae31 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.h +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -298,6 +298,6 @@ int getLDSNoRetOp(uint16_t Opcode); } //End namespace AMDGPU -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h index f5556c1e81fc..263561edd30d 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -29,6 +29,6 @@ public: unsigned StackSize; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index a1a1b4043429..0c06ccc736d0 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -375,7 +375,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { return false; } -} // namespace +} llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { return new R600VectorRegMerger(tm); diff --git a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp index 93bcf680a022..2fc7b02f673f 100644 --- a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp @@ -296,7 +296,7 @@ public: char R600TextureIntrinsicsReplacer::ID = 0; -} // namespace +} FunctionPass *llvm::createR600TextureIntrinsicsReplacer() { return new R600TextureIntrinsicsReplacer(); diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index f1b4ba1ac07d..4c3263911c40 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -39,7 +39,7 @@ enum { WQM = 1 << 20, VGPRSpill = 1 << 21 }; -} // namespace SIInstrFlags +} namespace llvm { namespace AMDGPU { @@ -74,7 +74,7 @@ namespace SIInstrFlags { P_NORMAL = 1 << 8, // Positive normal P_INFINITY = 1 << 9 // Positive infinity }; -} // namespace SIInstrFlags +} namespace SISrcMods { enum { @@ -100,16 +100,41 @@ namespace SIOutMods { #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) + #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) +#define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) +#define C_00B84C_SCRATCH_EN 0xFFFFFFFE #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) +#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) +#define C_00B84C_USER_SGPR 0xFFFFFFC1 #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) +#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) +#define C_00B84C_TGID_X_EN 0xFFFFFF7F #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) +#define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) +#define C_00B84C_TGID_Y_EN 0xFFFFFEFF #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) +#define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) +#define C_00B84C_TGID_Z_EN 0xFFFFFDFF #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) +#define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) +#define C_00B84C_TG_SIZE_EN 0xFFFFFBFF #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) - +#define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) +#define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF +/* CIK */ +#define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) +#define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) +#define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF +/* */ #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) +#define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) +#define C_00B84C_LDS_SIZE 0xFF007FFF +#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) +#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) +#define C_00B84C_EXCP_EN + #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 12d08cf4c7f5..ead1a3743473 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -583,7 +583,8 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = Splits[i].VT; - const unsigned Offset = 36 + VA.getLocMemOffset(); + const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), @@ -2211,8 +2212,9 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, std::pair<unsigned, const TargetRegisterClass *> SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + const std::string &Constraint_, MVT VT) const { + StringRef Constraint(Constraint_); if (Constraint == "r") { switch(VT.SimpleTy) { default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); @@ -2232,8 +2234,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } if (RC) { - unsigned Idx = std::atoi(Constraint.substr(2).c_str()); - if (Idx < RC->getNumRegs()) + uint32_t Idx; + bool Failed = Constraint.substr(2).getAsInteger(10, Idx); + if (!Failed && Idx < RC->getNumRegs()) return std::make_pair(RC->getRegister(Idx), RC); } } diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 47bc17823b3f..eb96bd0227b2 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } -unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { +int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { const unsigned Opcode = MI.getOpcode(); int NewOpc; // Try to map original to commuted opcode NewOpc = AMDGPU::getCommuteRev(Opcode); - // Check if the commuted (REV) opcode exists on the target. - if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) - return NewOpc; + if (NewOpc != -1) + // Check if the commuted (REV) opcode exists on the target. + return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; // Try to map commuted to original opcode NewOpc = AMDGPU::getCommuteOrig(Opcode); - // Check if the original (non-REV) opcode exists on the target. - if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) - return NewOpc; + if (NewOpc != -1) + // Check if the original (non-REV) opcode exists on the target. + return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; return Opcode; } @@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, if (MI->getNumOperands() < 3) return nullptr; + int CommutedOpcode = commuteOpcode(*MI); + if (CommutedOpcode == -1) + return nullptr; + int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src0); assert(Src0Idx != -1 && "Should always have src0 operand"); @@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, } if (MI) - MI->setDesc(get(commuteOpcode(*MI))); + MI->setDesc(get(CommutedOpcode)); return MI; } @@ -2716,8 +2720,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; - if (ST.isAmdHsaOS()) + if (ST.isAmdHsaOS()) { RsrcDataFormat |= (1ULL << 56); + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + // Set MTYPE = 2 + RsrcDataFormat |= (2ULL << 59); + } + return RsrcDataFormat; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 6fafb945c993..0382272068d2 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -117,7 +117,7 @@ public: // register. If there is no hardware instruction that can store to \p // DstRC, then AMDGPU::COPY is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; - unsigned commuteOpcode(const MachineInstr &MI) const; + int commuteOpcode(const MachineInstr &MI) const; MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const override; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 93e4ca74ec38..fcb58d5da3b0 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1740,7 +1740,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName, InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2, ClampMod:$clamp, omod:$omod), - " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", + "$dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp new file mode 100644 index 000000000000..b76b4007003f --- /dev/null +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -0,0 +1,60 @@ +//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "AMDGPUBaseInfo.h" +#include "llvm/MC/SubtargetFeature.h" + +#define GET_SUBTARGETINFO_ENUM +#include "AMDGPUGenSubtargetInfo.inc" +#undef GET_SUBTARGETINFO_ENUM + +namespace llvm { +namespace AMDGPU { + +IsaVersion getIsaVersion(const FeatureBitset &Features) { + + if (Features.test(FeatureISAVersion7_0_0)) + return {7, 0, 0}; + + if (Features.test(FeatureISAVersion7_0_1)) + return {7, 0, 1}; + + if (Features.test(FeatureISAVersion8_0_0)) + return {8, 0, 0}; + + if (Features.test(FeatureISAVersion8_0_1)) + return {8, 0, 1}; + + return {0, 0, 0}; +} + +void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, + const FeatureBitset &Features) { + + IsaVersion ISA = getIsaVersion(Features); + + memset(&Header, 0, sizeof(Header)); + + Header.amd_kernel_code_version_major = 1; + Header.amd_kernel_code_version_minor = 0; + Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU + Header.amd_machine_version_major = ISA.Major; + Header.amd_machine_version_minor = ISA.Minor; + Header.amd_machine_version_stepping = ISA.Stepping; + Header.kernel_code_entry_byte_offset = sizeof(Header); + // wavefront_size is specified as a power of 2: 2^6 = 64 threads. + Header.wavefront_size = 6; + // These alignment values are specified in powers of two, so alignment = + // 2^n. The minimum alignment is 2^4 = 16. + Header.kernarg_segment_alignment = 4; + Header.group_segment_alignment = 4; + Header.private_segment_alignment = 4; +} + +} // End namespace AMDGPU +} // End namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h new file mode 100644 index 000000000000..f57028cc5bfd --- /dev/null +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -0,0 +1,34 @@ +//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H + +#include "AMDKernelCodeT.h" + +namespace llvm { + +class FeatureBitset; + +namespace AMDGPU { + +struct IsaVersion { + unsigned Major; + unsigned Minor; + unsigned Stepping; +}; + +IsaVersion getIsaVersion(const FeatureBitset &Features); +void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, + const FeatureBitset &Features); + +} // end namespace AMDGPU +} // end namespace llvm + +#endif diff --git a/lib/Target/AMDGPU/Utils/CMakeLists.txt b/lib/Target/AMDGPU/Utils/CMakeLists.txt new file mode 100644 index 000000000000..2c07aeab7dd3 --- /dev/null +++ b/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMAMDGPUUtils + AMDGPUBaseInfo.cpp + ) diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt new file mode 100644 index 000000000000..dec5360e3bc7 --- /dev/null +++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AMDGPU/Utils/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AMDGPUUtils +parent = AMDGPU +required_libraries = Support +add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/Utils/Makefile b/lib/Target/AMDGPU/Utils/Makefile new file mode 100644 index 000000000000..1019e726d50e --- /dev/null +++ b/lib/Target/AMDGPU/Utils/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AMDGPU/Utils/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAMDGPUUtils + +# Hack: we need to include 'main' AMDGPU target directory to grab private +# headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index d554fe5d4465..9550a3a3cad1 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -46,6 +46,6 @@ FunctionPass *createThumb2SizeReductionPass( void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index c7ea18a17fef..96b4742da2bb 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -410,13 +410,13 @@ def : ProcessorModel<"cortex-r4", CortexA8Model, def : ProcessorModel<"cortex-r4f", CortexA8Model, [ProcR4, FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>; + FeatureVFP3, FeatureD16]>; // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS, FeatureVFPOnlySP, + FeatureHasRAS, FeatureD16, FeatureRClass]>; // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 4530e4155ae2..738ddedccdac 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() { } else if (STI.hasVFP4()) ATS.emitFPU(ARM::FK_NEON_VFPV4); else - ATS.emitFPU(ARM::FK_NEON); + ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, @@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() { ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) : ARM::FK_VFPV4); else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3); + ATS.emitFPU(STI.hasD16() + // +d16 + ? (STI.isFPOnlySP() + ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) + : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); else if (STI.hasVFP2()) ATS.emitFPU(ARM::FK_VFPV2); } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f2b7a6419be3..b1a11d626bda 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -367,14 +367,10 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; @@ -594,7 +590,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { // all definitions of CPSR are dead return true; } -} // namespace llvm +} /// GetInstSize - Return the size of the specified MachineInstr. /// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 6fc0edd101b9..b4706e348933 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -493,6 +493,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII); -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 2edb96adba42..d687568d7eb9 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -281,6 +281,6 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, return true; } -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index cb4eeb5fc43d..f4ec8c67c977 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -335,7 +335,7 @@ namespace { } }; char ARMConstantIslands::ID = 0; -} // namespace +} /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index b429bed9ff25..36f63e239a9e 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -44,7 +44,7 @@ namespace ARMCP { GOTTPOFF, TPOFF }; -} // namespace ARMCP +} /// ARMConstantPoolValue - ARM specific constantpool value. This is used to /// represent PC-relative displacement between the address of the load @@ -254,6 +254,6 @@ public: } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 963b46c98e00..4438f50758dc 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -69,7 +69,7 @@ namespace { MachineBasicBlock::iterator &MBBI); }; char ARMExpandPseudo::ID = 0; -} // namespace +} /// TransferImpOps - Transfer implicit operands on the pseudo instruction to /// the instructions created from the expansion. @@ -129,7 +129,7 @@ namespace { return PseudoOpc < TE.PseudoOpc; } }; -} // namespace +} static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index cead18f97d74..4175b4af86e6 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2898,7 +2898,7 @@ const struct FoldableLoadExtendsStruct { { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } }; -} // namespace +} /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index 5b4a44c72030..0c910ab6130f 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -92,6 +92,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) { } } -} // namespace llvm +} #endif diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 091086d3c429..a52e49780e27 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -221,7 +221,7 @@ struct StackAdjustingInsts { } } }; -} // namespace +} /// Emit an instruction sequence that will align the address in /// register Reg by zero-ing out the lower bits. For versions of the diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 98313e60e234..d763d17a506f 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -78,6 +78,6 @@ public: MachineBasicBlock::iterator MI) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 575a9d930675..50afb192b331 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -279,7 +279,7 @@ private: SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, bool is64BitVector); }; -} // namespace +} /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94a026bf2cc8..4b2105b7442f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -83,7 +83,7 @@ namespace { CallOrPrologue = PC; } }; -} // namespace +} // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { @@ -11404,6 +11404,167 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Addr}); } +/// \brief Lower an interleaved load into a vldN intrinsic. +/// +/// E.g. Lower an interleaved load (Factor = 2): +/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4 +/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements +/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements +/// +/// Into: +/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4) +/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0 +/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1 +bool ARMTargetLowering::lowerInterleavedLoad( + LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, + ArrayRef<unsigned> Indices, unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!Shuffles.empty() && "Empty shufflevector input"); + assert(Shuffles.size() == Indices.size() && + "Unmatched number of shufflevectors and indices"); + + VectorType *VecTy = Shuffles[0]->getType(); + Type *EltTy = VecTy->getVectorElementType(); + + const DataLayout *DL = getDataLayout(); + unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy); + bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + + // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't + // support i64/f64 element). + if ((VecSize != 64 && VecSize != 128) || EltIs64Bits) + return false; + + // A pointer vector can not be the return type of the ldN intrinsics. Need to + // load integer vectors first and then convert to pointer vectors. + if (EltTy->isPointerTy()) + VecTy = VectorType::get(DL->getIntPtrType(EltTy), + VecTy->getVectorNumElements()); + + static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, + Intrinsic::arm_neon_vld3, + Intrinsic::arm_neon_vld4}; + + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy); + + IRBuilder<> Builder(LI); + SmallVector<Value *, 2> Ops; + + Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); + Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); + Ops.push_back(Builder.getInt32(LI->getAlignment())); + + CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); + + // Replace uses of each shufflevector with the corresponding vector loaded + // by ldN. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SV = Shuffles[i]; + unsigned Index = Indices[i]; + + Value *SubVec = Builder.CreateExtractValue(VldN, Index); + + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + + SV->replaceAllUsesWith(SubVec); + } + + return true; +} + +/// \brief Get a mask consisting of sequential integers starting from \p Start. +/// +/// I.e. <Start, Start + 1, ..., Start + NumElts - 1> +static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumElts) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < NumElts; i++) + Mask.push_back(Builder.getInt32(Start + i)); + + return ConstantVector::get(Mask); +} + +/// \brief Lower an interleaved store into a vstN intrinsic. +/// +/// E.g. Lower an interleaved store (Factor = 3): +/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, +/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> +/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4 +/// +/// Into: +/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> +/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> +/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> +/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) +/// +/// Note that the new shufflevectors will be removed and we'll only generate one +/// vst3 instruction in CodeGen. +bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, + ShuffleVectorInst *SVI, + unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + + VectorType *VecTy = SVI->getType(); + assert(VecTy->getVectorNumElements() % Factor == 0 && + "Invalid interleaved store"); + + unsigned NumSubElts = VecTy->getVectorNumElements() / Factor; + Type *EltTy = VecTy->getVectorElementType(); + VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); + + const DataLayout *DL = getDataLayout(); + unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy); + bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + + // Skip illegal sub vector types and vector types of i64/f64 element (vstN + // doesn't support i64/f64 element). + if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) + return false; + + Value *Op0 = SVI->getOperand(0); + Value *Op1 = SVI->getOperand(1); + IRBuilder<> Builder(SI); + + // StN intrinsics don't support pointer vectors as arguments. Convert pointer + // vectors to integer vectors. + if (EltTy->isPointerTy()) { + Type *IntTy = DL->getIntPtrType(EltTy); + + // Convert to the corresponding integer vector. + Type *IntVecTy = + VectorType::get(IntTy, Op0->getType()->getVectorNumElements()); + Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); + Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); + + SubVecTy = VectorType::get(IntTy, NumSubElts); + } + + static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, + Intrinsic::arm_neon_vst4}; + Function *VstNFunc = Intrinsic::getDeclaration( + SI->getModule(), StoreInts[Factor - 2], SubVecTy); + + SmallVector<Value *, 6> Ops; + + Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); + Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + + // Split the shufflevector operands into sub vectors for the new vstN call. + for (unsigned i = 0; i < Factor; i++) + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts))); + + Ops.push_back(Builder.getInt32(SI->getAlignment())); + Builder.CreateCall(VstNFunc, Ops); + return true; +} + enum HABaseType { HA_UNKNOWN = 0, HA_FLOAT, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 71a47a2cb81b..74396392f8e3 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -215,7 +215,7 @@ namespace llvm { VST3LN_UPD, VST4LN_UPD }; - } // namespace ARMISD + } /// Define some predicates that are used for node matching. namespace ARM { @@ -433,6 +433,15 @@ namespace llvm { Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + + bool lowerInterleavedLoad(LoadInst *LI, + ArrayRef<ShuffleVectorInst *> Shuffles, + ArrayRef<unsigned> Indices, + unsigned Factor) const override; + bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const override; + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicRMWExpansionKind @@ -638,6 +647,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } -} // namespace llvm +} #endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 59e1535a6fe6..84f95be30991 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -198,7 +198,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} char ARMCGBR::ID = 0; FunctionPass* diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 9e5700a256bd..90f34ea08401 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -43,6 +43,6 @@ private: Reloc::Model RM) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 50e2292b8b6e..245c9e869bf6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -142,7 +142,7 @@ namespace { bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; char ARMLoadStoreOpt::ID = 0; -} // namespace +} static bool definesCPSR(const MachineInstr *MI) { for (const auto &MO : MI->operands()) { @@ -444,7 +444,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, return; } - if (MBBI->killsRegister(Base)) + if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) // Register got killed. Stop updating. return; } @@ -743,6 +743,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } } + for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { + MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0); + if (TransferOp.isUse() && TransferOp.getReg() == Base) + BaseKill = false; + } + SmallVector<std::pair<unsigned, bool>, 8> Regs; SmallVector<unsigned, 8> ImpDefs; SmallVector<MachineOperand *, 8> UsesOfImpDefs; @@ -1464,119 +1470,124 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { - const MachineOperand &BaseOp = MI->getOperand(2); - unsigned BaseReg = BaseOp.getReg(); - unsigned EvenReg = MI->getOperand(0).getReg(); - unsigned OddReg = MI->getOperand(1).getReg(); - unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); - unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); - // ARM errata 602117: LDRD with base in list may result in incorrect base - // register when interrupted or faulted. - bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); - if (!Errata602117 && - ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) - return false; + if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) + return false; - MachineBasicBlock::iterator NewBBI = MBBI; - bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; - bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; - bool EvenDeadKill = isLd ? - MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); - bool EvenUndef = MI->getOperand(0).isUndef(); - bool OddDeadKill = isLd ? - MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); - bool OddUndef = MI->getOperand(1).isUndef(); - bool BaseKill = BaseOp.isKill(); - bool BaseUndef = BaseOp.isUndef(); - bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); - bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); - int OffImm = getMemoryOpOffset(MI); - unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - - if (OddRegNum > EvenRegNum && OffImm == 0) { - // Ascending register numbers and no offset. It's safe to change it to a - // ldm or stm. - unsigned NewOpc = (isLd) - ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) - : (isT2 ? ARM::t2STMIA : ARM::STMIA); - if (isLd) { - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) - .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg) - .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) - .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); - ++NumLDRD2LDM; - } else { - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) - .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg) - .addReg(EvenReg, - getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) - .addReg(OddReg, - getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); - ++NumSTRD2STM; - } + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); + unsigned EvenReg = MI->getOperand(0).getReg(); + unsigned OddReg = MI->getOperand(1).getReg(); + unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); + unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); + + // ARM errata 602117: LDRD with base in list may result in incorrect base + // register when interrupted or faulted. + bool Errata602117 = EvenReg == BaseReg && + (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); + // ARM LDRD/STRD needs consecutive registers. + bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && + (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); + + if (!Errata602117 && !NonConsecutiveRegs) + return false; + + MachineBasicBlock::iterator NewBBI = MBBI; + bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; + bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; + bool EvenDeadKill = isLd ? + MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); + bool EvenUndef = MI->getOperand(0).isUndef(); + bool OddDeadKill = isLd ? + MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); + bool OddUndef = MI->getOperand(1).isUndef(); + bool BaseKill = BaseOp.isKill(); + bool BaseUndef = BaseOp.isUndef(); + bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); + bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); + int OffImm = getMemoryOpOffset(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + + if (OddRegNum > EvenRegNum && OffImm == 0) { + // Ascending register numbers and no offset. It's safe to change it to a + // ldm or stm. + unsigned NewOpc = (isLd) + ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) + : (isT2 ? ARM::t2STMIA : ARM::STMIA); + if (isLd) { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) + .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); + ++NumLDRD2LDM; + } else { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, + getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) + .addReg(OddReg, + getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); + ++NumSTRD2STM; + } + NewBBI = std::prev(MBBI); + } else { + // Split into two instructions. + unsigned NewOpc = (isLd) + ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + DebugLoc dl = MBBI->getDebugLoc(); + // If this is a load and base register is killed, it may have been + // re-defed by the load, make sure the first load does not clobber it. + if (isLd && + (BaseKill || OffKill) && + (TRI->regsOverlap(EvenReg, BaseReg))) { + assert(!TRI->regsOverlap(OddReg, BaseReg)); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, + OddReg, OddDeadKill, false, + BaseReg, false, BaseUndef, false, OffUndef, + Pred, PredReg, TII, isT2); NewBBI = std::prev(MBBI); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, + EvenReg, EvenDeadKill, false, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, + Pred, PredReg, TII, isT2); } else { - // Split into two instructions. - unsigned NewOpc = (isLd) - ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) - : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); - // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, - // so adjust and use t2LDRi12 here for that. - unsigned NewOpc2 = (isLd) - ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) - : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); - DebugLoc dl = MBBI->getDebugLoc(); - // If this is a load and base register is killed, it may have been - // re-defed by the load, make sure the first load does not clobber it. - if (isLd && - (BaseKill || OffKill) && - (TRI->regsOverlap(EvenReg, BaseReg))) { - assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, - OddReg, OddDeadKill, false, - BaseReg, false, BaseUndef, false, OffUndef, - Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, - EvenReg, EvenDeadKill, false, - BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, - Pred, PredReg, TII, isT2); - } else { - if (OddReg == EvenReg && EvenDeadKill) { - // If the two source operands are the same, the kill marker is - // probably on the first one. e.g. - // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 - EvenDeadKill = false; - OddDeadKill = true; - } - // Never kill the base register in the first instruction. - if (EvenReg == BaseReg) - EvenDeadKill = false; - InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, - EvenReg, EvenDeadKill, EvenUndef, - BaseReg, false, BaseUndef, false, OffUndef, - Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, - OddReg, OddDeadKill, OddUndef, - BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, - Pred, PredReg, TII, isT2); + if (OddReg == EvenReg && EvenDeadKill) { + // If the two source operands are the same, the kill marker is + // probably on the first one. e.g. + // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 + EvenDeadKill = false; + OddDeadKill = true; } - if (isLd) - ++NumLDRD2LDR; - else - ++NumSTRD2STR; + // Never kill the base register in the first instruction. + if (EvenReg == BaseReg) + EvenDeadKill = false; + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, + EvenReg, EvenDeadKill, EvenUndef, + BaseReg, false, BaseUndef, false, OffUndef, + Pred, PredReg, TII, isT2); + NewBBI = std::prev(MBBI); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, + OddReg, OddDeadKill, OddUndef, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, + Pred, PredReg, TII, isT2); } - - MBB.erase(MI); - MBBI = NewBBI; - return true; + if (isLd) + ++NumLDRD2LDR; + else + ++NumSTRD2STR; } - return false; + + MBB.erase(MI); + MBBI = NewBBI; + return true; } /// An optimization pass to turn multiple LDR / STR ops of the same base and @@ -1859,7 +1870,7 @@ namespace { bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); }; char ARMPreAllocLoadStoreOpt::ID = 0; -} // namespace +} bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TD = Fn.getTarget().getDataLayout(); diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 8b1210268eb2..14dd9ef333af 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -229,6 +229,6 @@ public: return It; } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 1c8e1f8b1412..30baf4263c11 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -32,7 +32,7 @@ public: } }; char ARMOptimizeBarriersPass::ID = 0; -} // namespace +} // Returns whether the instruction can safely move past a DMB instruction // The current implementation allows this iif MI does not have any possible diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 4563caae9ffe..1db190f41e1a 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -70,6 +70,6 @@ public: RTLIB::Libcall LC) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f00594f82012..9909a6a6d198 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -453,6 +453,6 @@ public: /// True if fast-isel is used. bool useFastISel() const; }; -} // namespace llvm +} // End llvm namespace #endif // ARMSUBTARGET_H diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 104a34f97e5e..6e81bd2d349d 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -332,6 +332,10 @@ void ARMPassConfig::addIRPasses() { })); TargetPassConfig::addIRPasses(); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createInterleavedAccessPass(TM)); } bool ARMPassConfig::addPreISel() { diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 4e1b371640bc..f4901fc24e44 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -478,3 +478,28 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } return LT.first; } + +unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace) { + assert(Factor >= 2 && "Invalid interleave factor"); + assert(isa<VectorType>(VecTy) && "Expect a vector type"); + + // vldN/vstN doesn't support vector types of i64/f64 element. + bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + + if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { + unsigned NumElts = VecTy->getVectorNumElements(); + Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); + unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + + // vldN/vstN only support legal vector types of size 64 or 128 in bits. + if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) + return Factor; + } + + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 9479d7693ebf..f2e5db655ccf 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -126,6 +126,11 @@ public: unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 35387d3e6cf1..c2db74619871 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" @@ -9887,22 +9888,13 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { } Lex(); + MCSymbol *Sym; const MCExpr *Value; - if (Parser.parseExpression(Value)) { - TokError("missing expression"); - Parser.eatToEndOfStatement(); - return false; - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - TokError("unexpected token"); - Parser.eatToEndOfStatement(); - return false; - } - Lex(); + if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true, + Parser, Sym, Value)) + return true; - MCSymbol *Alias = getContext().getOrCreateSymbol(Name); - getTargetStreamer().emitThumbSet(Alias, Value); + getTargetStreamer().emitThumbSet(Sym, Value); return false; } diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index f973a8de8bcf..097ec04e7052 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -81,7 +81,7 @@ namespace { private: std::vector<unsigned char> ITStates; }; -} // namespace +} namespace { /// ARM disassembler for all ARM platforms. @@ -118,7 +118,7 @@ private: DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(MCInst&) const; }; -} // namespace +} static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index e28f6e097421..a6206e3d9585 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -29,6 +29,6 @@ public: Subtype); } }; -} // namespace +} #endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 412feb8873ca..68b12edd089e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -23,6 +23,6 @@ public: return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; -} // namespace +} #endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 1975bcaa234e..4289a73e9d6b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -114,7 +114,7 @@ namespace ARM_PROC { case ID: return "id"; } } -} // namespace ARM_PROC +} namespace ARM_MB { // The Memory Barrier Option constants map directly to the 4-bit encoding of @@ -459,6 +459,6 @@ namespace ARMII { } // end namespace ARMII -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 9fe27fbcff4a..804d3534096a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -40,7 +40,7 @@ namespace { bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; -} // namespace +} ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index bbc0b37175df..4d12bfb5d60f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -563,20 +563,13 @@ private: } void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().createTempSymbol(); - EmitLabel(Start); - auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( Name + "." + Twine(MappingSymbolCounter++))); + EmitLabel(Symbol); - getAssembler().registerSymbol(*Symbol); Symbol->setType(ELF::STT_NOTYPE); Symbol->setBinding(ELF::STB_LOCAL); Symbol->setExternal(false); - AssignSection(Symbol, getCurrentSection().first); - - const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); - Symbol->setVariableValue(Value); } void EmitThumbFunc(MCSymbol *Func) override { @@ -804,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3B, /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_D16_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + + case ARM::FK_VFPV3XD: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3XD_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, @@ -849,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::FK_NEON_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeon, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_NEON_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, @@ -1345,6 +1382,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, return S; } -} // namespace llvm +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 23ef50132900..46ba57170db5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -104,7 +104,7 @@ enum Fixups { LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; -} // namespace ARM -} // namespace llvm +} +} #endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 0fb395e473a6..fafe25ae5be5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -370,7 +370,7 @@ public: } }; -} // namespace +} static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { return new ARMMCInstrAnalysis(Info); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index c6f2d1341623..fd30623d79af 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -103,7 +103,7 @@ MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, /// Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); -} // namespace llvm +} // End llvm namespace // Defines symbolic names for ARM registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 6ac778e0cecd..95d7ea7c04a3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -56,7 +56,7 @@ public: const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; }; -} // namespace +} static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, unsigned &Log2Size) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 32481e276b00..173cc93d44fb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -60,7 +60,7 @@ namespace { EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH); } }; -} // namespace +} void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { if (RegSave == 0u) diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 34b552f7a212..166c04b41a77 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -79,7 +79,7 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; } -} // namespace +} namespace llvm { MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 6515a650be59..b993b1be4847 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -35,7 +35,7 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } -} // namespace +} MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index ca98f696b7dd..ed2deeaa24c0 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -71,7 +71,7 @@ namespace { bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); }; char MLxExpansion::ID = 0; -} // namespace +} void MLxExpansion::clearStack() { std::fill(LastMIs, LastMIs + 4, nullptr); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index e5e89fad3d71..31d57325ebd6 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -47,6 +47,6 @@ public: MachineBasicBlock::iterator MI) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 31b4df2e5b0c..f3f493d89237 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -58,6 +58,6 @@ private: void expandLoadStackGuard(MachineBasicBlock::iterator MI, Reloc::Model RM) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 7ce602d326cd..68736bc1decd 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -48,7 +48,7 @@ namespace { bool InsertITInstructions(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; -} // namespace +} /// TrackDefUses - Tracking what registers are being defined and used by /// instructions in the IT block. This also tracks "dependencies", i.e. uses diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index d186dfb2ec91..916ab06ec305 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -73,6 +73,6 @@ private: ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); -} // namespace llvm +} #endif diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 0dd1b4c15ef8..d9ab824995c1 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -202,7 +202,7 @@ namespace { std::function<bool(const Function &)> PredicateFtor; }; char Thumb2SizeReduce::ID = 0; -} // namespace +} Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) : MachineFunctionPass(ID), PredicateFtor(Ftor) { diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h index e55f88f53aec..23aaff37f409 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.h +++ b/lib/Target/ARM/ThumbRegisterInfo.h @@ -60,6 +60,6 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index 9d0aa7a98a64..10ec6587550b 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -44,7 +44,7 @@ public: const char *Modifier = nullptr); void EmitInstruction(const MachineInstr *MI) override; }; -} // namespace +} void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier) { diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h index a6fe7c98115b..3b9fc443e053 100644 --- a/lib/Target/BPF/BPFFrameLowering.h +++ b/lib/Target/BPF/BPFFrameLowering.h @@ -37,5 +37,5 @@ public: MBB.erase(MI); } }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index b49de3a27083..d9e654c76428 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -51,7 +51,7 @@ private: // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); }; -} // namespace +} // ComplexPattern used on BPF Load/Store instructions bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 21d160d49946..38c56bbef81e 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -86,7 +86,7 @@ public: }; int DiagnosticInfoUnsupported::KindID = 0; -} // namespace +} BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, const BPFSubtarget &STI) diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h index b56bb39ca85d..ec71dca2faeb 100644 --- a/lib/Target/BPF/BPFISelLowering.h +++ b/lib/Target/BPF/BPFISelLowering.h @@ -85,6 +85,6 @@ private: return true; } }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h index bd96f76a8075..ac60188804d2 100644 --- a/lib/Target/BPF/BPFInstrInfo.h +++ b/lib/Target/BPF/BPFInstrInfo.h @@ -54,6 +54,6 @@ public: MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, DebugLoc DL) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h index ba9189792cbb..054e89407db2 100644 --- a/lib/Target/BPF/BPFMCInstLower.h +++ b/lib/Target/BPF/BPFMCInstLower.h @@ -38,6 +38,6 @@ public: MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h index 44977a210959..7072dd0bde1a 100644 --- a/lib/Target/BPF/BPFRegisterInfo.h +++ b/lib/Target/BPF/BPFRegisterInfo.h @@ -35,6 +35,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo { unsigned getFrameRegister(const MachineFunction &MF) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/BPFSubtarget.h b/lib/Target/BPF/BPFSubtarget.h index 701ac577dd74..5ad58db75395 100644 --- a/lib/Target/BPF/BPFSubtarget.h +++ b/lib/Target/BPF/BPFSubtarget.h @@ -59,6 +59,6 @@ public: return &InstrInfo.getRegisterInfo(); } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 5a888a955e33..06cba2252a25 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -60,7 +60,7 @@ public: bool addInstSelector() override; }; -} // namespace +} TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { return new BPFPassConfig(this, PM); diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h index c715fd5f0089..a0086df2d32c 100644 --- a/lib/Target/BPF/BPFTargetMachine.h +++ b/lib/Target/BPF/BPFTargetMachine.h @@ -38,6 +38,6 @@ public: return TLOF.get(); } }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h index cb074713cce5..adcaff686933 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h @@ -37,6 +37,6 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 33aecb7b8ec3..36f99262ed70 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -84,7 +84,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createBPFELFObjectWriter(OS, 0, IsLittleEndian); } -} // namespace +} MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index ef4f05f3d810..05ba6183e322 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -25,7 +25,7 @@ protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; }; -} // namespace +} BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE, diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index 22376543bd05..d63bbf49294e 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -36,6 +36,6 @@ public: HasDotTypeDotSizeDirective = false; } }; -} // namespace llvm +} #endif diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index b579afd690e9..dc4ede30f191 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -58,7 +58,7 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; }; -} // namespace +} MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 3d2583a11349..e2ae6526edc6 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -49,7 +49,7 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI, MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian); -} // namespace llvm +} // Defines symbolic names for BPF registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 9c9c097b4c3d..bc5d7f65b2f6 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1678,9 +1678,8 @@ void CppWriter::printFunctionUses(const Function* F) { consts.insert(GVar->getInitializer()); } else if (Constant* C = dyn_cast<Constant>(operand)) { consts.insert(C); - for (unsigned j = 0; j < C->getNumOperands(); ++j) { + for (Value* operand : C->operands()) { // If the operand references a GVal or Constant, make a note of it - Value* operand = C->getOperand(j); printType(operand->getType()); if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { gvs.insert(GV); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 0cd20daa12fa..ebf0635b12e4 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -37,7 +37,7 @@ public: extern Target TheCppBackendTarget; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 837838afc0f2..9cc1e944d359 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -53,7 +53,7 @@ public: raw_ostream &VStream, raw_ostream &CStream) const override; }; -} // namespace +} static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index b24d24a6d6f2..d360be2aa5b2 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -58,6 +58,6 @@ namespace llvm { /// \brief Creates a Hexagon-specific Target Transformation Info pass. ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index f09a5b91fe8b..792fc8b7af3a 100755 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -53,6 +53,6 @@ namespace llvm { static const char *getRegisterName(unsigned RegNo); }; -} // namespace llvm +} // end of llvm namespace #endif diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index ff1a4fe30757..3753b745657b 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -228,7 +228,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { } return true; } -} // namespace +} //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 33766dfb830c..37ed173a79cd 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -173,7 +173,7 @@ namespace { bool coalesceRegisters(RegisterRef R1, RegisterRef R2); bool coalesceSegments(MachineFunction &MF); }; -} // namespace +} char HexagonExpandCondsets::ID = 0; diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 1657d88a4f43..e4c8d8f7b28c 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -333,7 +333,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { return true; } -} // namespace +} //===----------------------------------------------------------------------===// // Public Constructor Functions diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 3ea77cdbb1f7..d0c7f9c8960f 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -67,7 +67,7 @@ namespace { }; char HexagonFixupHwLoops::ID = 0; -} // namespace +} INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup", "Hexagon Hardware Loops Fixup", false, false) diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 9797134f41ad..868f87e18413 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -238,7 +238,7 @@ namespace { return true; return false; } -} // namespace +} /// Implements shrink-wrapping of the stack frame. By default, stack frame diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 767e13cbd6a6..89500cb85724 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -99,6 +99,6 @@ private: bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 1a14c88f04fd..6e9e69f5a2c7 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -95,7 +95,7 @@ public: unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } }; -} // namespace +} // Implement calling convention for Hexagon. static bool diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index b9d18df05b54..b80e8477eb7b 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -86,7 +86,7 @@ bool isPositiveHalfWord(SDNode *N); OP_END }; - } // namespace HexagonISD + } class HexagonSubtarget; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 91f508ee5ecf..d0b8a4631c1d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -229,6 +229,6 @@ public: }; -} // namespace llvm +} #endif diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 5681ae29831f..76723586c66e 100644 --- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -80,6 +80,6 @@ public: void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; } unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index fae16e2a0612..60343442e327 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -238,7 +238,7 @@ protected: #endif }; -} // namespace llvm +} // namespace #endif diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 94ec2e7ca6c1..93dcbe233b25 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -104,7 +104,7 @@ namespace { private: void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); }; -} // namespace +} char HexagonPeephole::ID = 0; diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index d586c395a9ad..7069ad36e21a 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -48,7 +48,7 @@ namespace { FunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} char HexagonRemoveExtendArgs::ID = 0; diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index c72051ca1348..8ac2e43f9294 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -32,6 +32,6 @@ public: MachinePointerInfo SrcPtrInfo) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 61bb7c5139e4..d3eb56f4ba0f 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -156,7 +156,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { return true; } -} // namespace +} //===----------------------------------------------------------------------===// // Public Constructor Functions diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 90f1ced5420a..a173a8087832 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -77,7 +77,7 @@ namespace llvm { FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonNewValueJump(); -} // namespace llvm +} // end namespace llvm; /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// diff --git a/lib/Target/Hexagon/HexagonTargetStreamer.h b/lib/Target/Hexagon/HexagonTargetStreamer.h index 2b4a3ada506b..e19c404450e6 100644 --- a/lib/Target/Hexagon/HexagonTargetStreamer.h +++ b/lib/Target/Hexagon/HexagonTargetStreamer.h @@ -26,6 +26,6 @@ public: unsigned ByteAlign, unsigned AccessGranularity){}; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 66fdd65b3ea7..b91a3f6f8c6c 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -170,7 +170,7 @@ namespace { void reserveResourcesForConstExt(MachineInstr* MI); bool isNewValueInst(MachineInstr* MI); }; -} // namespace +} INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", false, false) @@ -272,9 +272,8 @@ static bool IsIndirectCall(MachineInstr* MI) { // reservation fail. void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); + MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), + MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { ResourceTracker->reserveResources(PseudoMI); @@ -290,11 +289,10 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && "Should only be called for constant extended instructions"); - MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); + MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), + MI->getDebugLoc()); bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); - MF->DeleteMachineInstr(PseudoMI); + MF.DeleteMachineInstr(PseudoMI); return CanReserve; } @@ -302,9 +300,8 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { // true, otherwise, return false. bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); + MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), + MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { ResourceTracker->reserveResources(PseudoMI); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 0f7cf0e7fcbd..da5d4d1da69b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -31,7 +31,7 @@ public: unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, bool IsPCRel) const override; }; -} // namespace +} HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C) : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 6f8cb90f18f9..9fc4e2aeaba6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -370,7 +370,7 @@ namespace { return false; } } -} // namespace +} unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 0d1f1e607e63..886f8db3bc63 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -174,7 +174,7 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return HexagonII::HCG_None; } -} // namespace +} /// getCompoundOp - Return the index from 0-7 into the above opcode lists. namespace { @@ -199,7 +199,7 @@ unsigned getCompoundOp(MCInst const &HMCI) { return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; } } -} // namespace +} namespace { MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { @@ -331,7 +331,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { return CompoundInsn; } -} // namespace +} /// Non-Symmetrical. See if these two instructions are fit for compound pair. namespace { @@ -348,7 +348,7 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); } -} // namespace +} namespace { bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { @@ -396,7 +396,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { } return false; } -} // namespace +} /// tryCompound - Given a bundle check for compound insns when one /// is found update the contents fo the bundle with the compound insn. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index e69a52de5c77..48b15f85a783 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -461,4 +461,4 @@ void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { MCOperand &Operand = MCI.getOperand(0); Operand.setImm(Operand.getImm() | outerLoopMask); } -} // namespace llvm +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 9f7562a20063..32d61a4a7be5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -229,7 +229,7 @@ bool subInstWouldBeExtended(MCInst const &potentialDuplex); // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); -} // namespace HexagonMCInstrInfo -} // namespace llvm +} +} #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h index 9c0e3f2bbf6e..a21cce1fc240 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -60,6 +60,6 @@ bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &, SmallVector<DuplexCandidate, 8>); -} // namespace llvm +} #endif // HEXAGONMCSHUFFLER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 4a4f0c21afa2..83ce0abd835e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -102,7 +102,7 @@ public: OS << "\n\t}" << PacketBundle.second; } }; -} // namespace +} namespace { class HexagonTargetELFStreamer : public HexagonTargetStreamer { @@ -137,7 +137,7 @@ public: Symbol, Size, ByteAlignment, AccessSize); } }; -} // namespace +} static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT) { @@ -172,9 +172,10 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, return nullptr; } -MCTargetStreamer *createMCAsmTargetStreamer( - MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, - bool IsVerboseAsm) { +static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool IsVerboseAsm) { return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 89c3eb3cd65e..cb626503313f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -49,7 +49,7 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T, MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU); -} // namespace llvm +} // End llvm namespace // Define symbolic names for Hexagon registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index feaaa4f780d5..41112ac0b46e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -81,6 +81,9 @@ unsigned HexagonResource::setWeight(unsigned s) { const unsigned MaskWeight = SlotWeight - 1; bool Key = (1 << s) & getUnits(); + // TODO: Improve this API so that we can prevent misuse statically. + assert(SlotWeight * s < 32 && "Argument to setWeight too large."); + // Calculate relative weight of the insn for the given slot, weighing it the // heavier the more restrictive the insn is and the lowest the slots that the // insn may be executed in. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 53325f6edb7c..8b6c72ee25e6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -34,8 +34,7 @@ public: HexagonResource(unsigned s) { setUnits(s); }; void setUnits(unsigned s) { - Slots = s & ~(-1 << HEXAGON_PACKET_SIZE); - setWeight(s); + Slots = s & ~(~0U << HEXAGON_PACKET_SIZE); }; unsigned setWeight(unsigned s); @@ -134,6 +133,6 @@ public: void setError(unsigned Err) { Error = Err; }; unsigned getError() const { return (Error); }; }; -} // namespace llvm +} #endif // HEXAGONSHUFFLER_H diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index ab8232489282..f05d7a465252 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -31,6 +31,7 @@ subdirectories = PowerPC Sparc SystemZ + WebAssembly X86 XCore diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 80565aab180e..70141a998e4a 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -40,6 +40,6 @@ namespace llvm { void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; -} // namespace llvm +} #endif diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index 302012e1b148..796f25233123 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -30,7 +30,7 @@ namespace MSP430CC { COND_INVALID = -1 }; -} // namespace MSP430CC +} namespace llvm { class MSP430TargetMachine; @@ -42,6 +42,6 @@ namespace llvm { FunctionPass *createMSP430BranchSelectionPass(); -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 2bc11c07f8ff..ffcf22216d4f 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -44,7 +44,7 @@ namespace { } }; char MSP430BSel::ID = 0; -} // namespace +} /// createMSP430BranchSelectionPass - returns an instance of the Branch /// Selection Pass diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index 2f20bbd8ae15..48c4dc866a63 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -49,6 +49,6 @@ public: RegScavenger *RS = nullptr) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index a60108df360c..5ce5013d898c 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -85,7 +85,7 @@ namespace { errs() << " JT" << JT << " Align" << Align << '\n'; } }; -} // namespace +} /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine /// instructions for SelectionDAG operations. diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index b09060939ac5..80d3ae175fb1 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -64,7 +64,7 @@ namespace llvm { /// SHL, SRA, SRL - Non-constant shifts. SHL, SRA, SRL }; - } // namespace MSP430ISD + } class MSP430Subtarget; class MSP430TargetLowering : public TargetLowering { diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index c6bad1eadd65..3cf3b1bb8ab2 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -38,7 +38,7 @@ namespace MSP430II { Size4Bytes = 3 << SizeShift, Size6Bytes = 4 << SizeShift }; -} // namespace MSP430II +} class MSP430InstrInfo : public MSP430GenInstrInfo { const MSP430RegisterInfo RI; @@ -87,6 +87,6 @@ public: }; -} // namespace llvm +} #endif diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h index ebbc6e51286e..ebd639744bcc 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.h +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -42,6 +42,6 @@ public: MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 3d1a245c4fea..fcc5f5b88600 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -49,6 +49,6 @@ public: void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h index 95c929372a7f..61a6b19111db 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -26,6 +26,6 @@ public: ~MSP430SelectionDAGInfo(); }; -} // namespace llvm +} #endif diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 958a5d39487d..81f6f027d45c 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -64,6 +64,6 @@ public: return &TSInfo; } }; -} // namespace llvm +} // End llvm namespace #endif // LLVM_TARGET_MSP430_SUBTARGET_H diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 5b8d633554b8..f14156dbfa2b 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -113,6 +113,7 @@ class MipsAsmParser : public MCTargetAsmParser { // nullptr, which indicates that no function is currently // selected. This usually happens after an '.end func' // directive. + bool IsLittleEndian; // Print a warning along with its fix-it message at the given range. void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg, @@ -214,11 +215,17 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandCondBranches(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + bool expandUlhu(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + + bool expandUlw(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + void createNop(bool hasShortDelaySlot, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, - SmallVectorImpl<MCInst> &Instructions); + bool Is64Bit, SmallVectorImpl<MCInst> &Instructions); bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -251,6 +258,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetMips16Directive(); bool parseSetNoMips16Directive(); bool parseSetFpDirective(); + bool parseSetOddSPRegDirective(); + bool parseSetNoOddSPRegDirective(); bool parseSetPopDirective(); bool parseSetPushDirective(); bool parseSetSoftFloatDirective(); @@ -352,6 +361,16 @@ class MipsAsmParser : public MCTargetAsmParser { } } + void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + setFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(STI.getFeatureBits()); + } + + void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + clearFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(STI.getFeatureBits()); + } + public: enum MipsMatchResultTy { Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY @@ -387,6 +406,13 @@ public: report_fatal_error("-mno-odd-spreg requires the O32 ABI"); CurrentFn = nullptr; + + Triple TheTriple(sti.getTargetTriple()); + if ((TheTriple.getArch() == Triple::mips) || + (TheTriple.getArch() == Triple::mips64)) + IsLittleEndian = false; + else + IsLittleEndian = true; } /// True if all of $fcc0 - $fcc7 exist for the current ISA. @@ -462,6 +488,8 @@ public: void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc); void warnIfNoMacro(SMLoc Loc); + + bool isLittle() const { return IsLittleEndian; } }; } @@ -486,11 +514,11 @@ public: RegKind_CCR = 128, /// CCR RegKind_HWRegs = 256, /// HWRegs RegKind_COP3 = 512, /// COP3 - + RegKind_COP0 = 1024, /// COP0 /// Potentially any (e.g. $1) RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 | RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC | - RegKind_CCR | RegKind_HWRegs | RegKind_COP3 + RegKind_CCR | RegKind_HWRegs | RegKind_COP3 | RegKind_COP0 }; private: @@ -652,6 +680,14 @@ private: return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } + /// Coerce the register to COP0 and return the real register for the + /// current target. + unsigned getCOP0Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_COP0) && "Invalid access!"); + unsigned ClassID = Mips::COP0RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + /// Coerce the register to COP2 and return the real register for the /// current target. unsigned getCOP2Reg() const { @@ -793,6 +829,11 @@ public: Inst.addOperand(MCOperand::createReg(getMSACtrlReg())); } + void addCOP0AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getCOP0Reg())); + } + void addCOP2AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getCOP2Reg())); @@ -1168,6 +1209,9 @@ public: bool isACCAsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3; } + bool isCOP0AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_COP0 && RegIdx.Index <= 31; + } bool isCOP2AsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31; } @@ -1635,6 +1679,8 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) { case Mips::BLEU: case Mips::BGEU: case Mips::BGTU: + case Mips::Ulhu: + case Mips::Ulw: return true; default: return false; @@ -1673,6 +1719,10 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::BGEU: case Mips::BGTU: return expandCondBranches(Inst, IDLoc, Instructions); + case Mips::Ulhu: + return expandUlhu(Inst, IDLoc, Instructions); + case Mips::Ulw: + return expandUlw(Inst, IDLoc, Instructions); } } @@ -1774,6 +1824,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, MCInst tmpInst; + unsigned TmpReg = DstReg; + if (UseSrcReg && (DstReg == SrcReg)) { + // At this point we need AT to perform the expansions and we exit if it is + // not available. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + TmpReg = ATReg; + } + tmpInst.setLoc(IDLoc); // FIXME: gas has a special case for values that are 000...1111, which // becomes a li -1 and then a dsrl @@ -1810,23 +1870,23 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the // upper 32 bits. tmpInst.setOpcode(Mips::ORi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::createImm(Bits31To16)); tmpInst.setLoc(IDLoc); Instructions.push_back(tmpInst); // Move the value to the upper 16 bits by doing a 16-bit left shift. - createLShiftOri<16>(0, DstReg, IDLoc, Instructions); + createLShiftOri<16>(0, TmpReg, IDLoc, Instructions); } else { tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createImm(Bits31To16)); Instructions.push_back(tmpInst); } - createLShiftOri<0>(Bits15To0, DstReg, IDLoc, Instructions); + createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions); if (UseSrcReg) - createAddu(DstReg, DstReg, SrcReg, Instructions); + createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); } else if ((ImmValue & (0xffffLL << 48)) == 0) { if (Is32BitImm) { @@ -1853,14 +1913,14 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, uint16_t Bits15To0 = ImmValue & 0xffff; tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createImm(Bits47To32)); Instructions.push_back(tmpInst); - createLShiftOri<0>(Bits31To16, DstReg, IDLoc, Instructions); - createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions); + createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions); + createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions); if (UseSrcReg) - createAddu(DstReg, DstReg, SrcReg, Instructions); + createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); } else { if (Is32BitImm) { @@ -1889,22 +1949,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, uint16_t Bits15To0 = ImmValue & 0xffff; tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createImm(Bits63To48)); Instructions.push_back(tmpInst); - createLShiftOri<0>(Bits47To32, DstReg, IDLoc, Instructions); + createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions); // When Bits31To16 is 0, do a left shift of 32 bits instead of doing // two left shifts of 16 bits. if (Bits31To16 == 0) { - createLShiftOri<32>(Bits15To0, DstReg, IDLoc, Instructions); + createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions); } else { - createLShiftOri<16>(Bits31To16, DstReg, IDLoc, Instructions); - createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions); + createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions); + createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions); } if (UseSrcReg) - createAddu(DstReg, DstReg, SrcReg, Instructions); + createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); } return false; } @@ -1991,6 +2051,18 @@ bool MipsAsmParser::loadAndAddSymbolAddress( const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); + bool UseSrcReg = SrcReg != Mips::NoRegister; + + unsigned TmpReg = DstReg; + if (UseSrcReg && (DstReg == SrcReg)) { + // At this point we need AT to perform the expansions and we exit if it is + // not available. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + TmpReg = ATReg; + } + if (!Is32BitSym) { // If it's a 64-bit architecture, expand to: // la d,sym => lui d,highest(sym) @@ -2005,31 +2077,31 @@ bool MipsAsmParser::loadAndAddSymbolAddress( &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext()); tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createExpr(HighestExpr)); Instructions.push_back(tmpInst); - createLShiftOri<0>(MCOperand::createExpr(HigherExpr), DstReg, SMLoc(), + createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(), Instructions); - createLShiftOri<16>(MCOperand::createExpr(HiExpr), DstReg, SMLoc(), + createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(), Instructions); - createLShiftOri<16>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(), + createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(), Instructions); } else { // Otherwise, expand to: // la d,sym => lui d,hi16(sym) // ori d,d,lo16(sym) tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(TmpReg)); tmpInst.addOperand(MCOperand::createExpr(HiExpr)); Instructions.push_back(tmpInst); - createLShiftOri<0>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(), + createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(), Instructions); } - if (SrcReg != Mips::NoRegister) - createAddu(DstReg, DstReg, SrcReg, Instructions); + if (UseSrcReg) + createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions); return false; } @@ -2449,6 +2521,174 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, return false; } +bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + if (hasMips32r6() || hasMips64r6()) { + Error(IDLoc, "instruction not supported on mips32r6 or mips64r6"); + return false; + } + + warnIfNoMacro(IDLoc); + + const MCOperand &DstRegOp = Inst.getOperand(0); + assert(DstRegOp.isReg() && "expected register operand kind"); + + const MCOperand &SrcRegOp = Inst.getOperand(1); + assert(SrcRegOp.isReg() && "expected register operand kind"); + + const MCOperand &OffsetImmOp = Inst.getOperand(2); + assert(OffsetImmOp.isImm() && "expected immediate operand kind"); + + unsigned DstReg = DstRegOp.getReg(); + unsigned SrcReg = SrcRegOp.getReg(); + int64_t OffsetValue = OffsetImmOp.getImm(); + + // NOTE: We always need AT for ULHU, as it is always used as the source + // register for one of the LBu's. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + // When the value of offset+1 does not fit in 16 bits, we have to load the + // offset in AT, (D)ADDu the original source register (if there was one), and + // then use AT as the source register for the 2 generated LBu's. + bool LoadedOffsetInAT = false; + if (!isInt<16>(OffsetValue + 1) || !isInt<16>(OffsetValue)) { + LoadedOffsetInAT = true; + + if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(), + IDLoc, Instructions)) + return true; + + // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate() + // because it will make our output more similar to GAS'. For example, + // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9", + // instead of just an "ori $1, $9, 32768". + // NOTE: If there is no source register specified in the ULHU, the parser + // will interpret it as $0. + if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64) + createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions); + } + + unsigned FirstLbuDstReg = LoadedOffsetInAT ? DstReg : ATReg; + unsigned SecondLbuDstReg = LoadedOffsetInAT ? ATReg : DstReg; + unsigned LbuSrcReg = LoadedOffsetInAT ? ATReg : SrcReg; + + int64_t FirstLbuOffset = 0, SecondLbuOffset = 0; + if (isLittle()) { + FirstLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1); + SecondLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue; + } else { + FirstLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue; + SecondLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1); + } + + unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg; + + MCInst TmpInst; + TmpInst.setOpcode(Mips::LBu); + TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg)); + TmpInst.addOperand(MCOperand::createReg(LbuSrcReg)); + TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset)); + Instructions.push_back(TmpInst); + + TmpInst.clear(); + TmpInst.setOpcode(Mips::LBu); + TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg)); + TmpInst.addOperand(MCOperand::createReg(LbuSrcReg)); + TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset)); + Instructions.push_back(TmpInst); + + TmpInst.clear(); + TmpInst.setOpcode(Mips::SLL); + TmpInst.addOperand(MCOperand::createReg(SllReg)); + TmpInst.addOperand(MCOperand::createReg(SllReg)); + TmpInst.addOperand(MCOperand::createImm(8)); + Instructions.push_back(TmpInst); + + TmpInst.clear(); + TmpInst.setOpcode(Mips::OR); + TmpInst.addOperand(MCOperand::createReg(DstReg)); + TmpInst.addOperand(MCOperand::createReg(DstReg)); + TmpInst.addOperand(MCOperand::createReg(ATReg)); + Instructions.push_back(TmpInst); + + return false; +} + +bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + if (hasMips32r6() || hasMips64r6()) { + Error(IDLoc, "instruction not supported on mips32r6 or mips64r6"); + return false; + } + + const MCOperand &DstRegOp = Inst.getOperand(0); + assert(DstRegOp.isReg() && "expected register operand kind"); + + const MCOperand &SrcRegOp = Inst.getOperand(1); + assert(SrcRegOp.isReg() && "expected register operand kind"); + + const MCOperand &OffsetImmOp = Inst.getOperand(2); + assert(OffsetImmOp.isImm() && "expected immediate operand kind"); + + unsigned SrcReg = SrcRegOp.getReg(); + int64_t OffsetValue = OffsetImmOp.getImm(); + unsigned ATReg = 0; + + // When the value of offset+3 does not fit in 16 bits, we have to load the + // offset in AT, (D)ADDu the original source register (if there was one), and + // then use AT as the source register for the generated LWL and LWR. + bool LoadedOffsetInAT = false; + if (!isInt<16>(OffsetValue + 3) || !isInt<16>(OffsetValue)) { + ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + LoadedOffsetInAT = true; + + warnIfNoMacro(IDLoc); + + if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(), + IDLoc, Instructions)) + return true; + + // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate() + // because it will make our output more similar to GAS'. For example, + // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9", + // instead of just an "ori $1, $9, 32768". + // NOTE: If there is no source register specified in the ULW, the parser + // will interpret it as $0. + if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64) + createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions); + } + + unsigned FinalSrcReg = LoadedOffsetInAT ? ATReg : SrcReg; + int64_t LeftLoadOffset = 0, RightLoadOffset = 0; + if (isLittle()) { + LeftLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3); + RightLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue; + } else { + LeftLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue; + RightLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3); + } + + MCInst LeftLoadInst; + LeftLoadInst.setOpcode(Mips::LWL); + LeftLoadInst.addOperand(DstRegOp); + LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg)); + LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset)); + Instructions.push_back(LeftLoadInst); + + MCInst RightLoadInst; + RightLoadInst.setOpcode(Mips::LWR); + RightLoadInst.addOperand(DstRegOp); + RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg)); + RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset )); + Instructions.push_back(RightLoadInst); + + return false; +} + void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { MCInst NopInst; @@ -2466,10 +2706,10 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc, } void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg, - unsigned TrgReg, + unsigned TrgReg, bool Is64Bit, SmallVectorImpl<MCInst> &Instructions) { MCInst AdduInst; - AdduInst.setOpcode(Mips::ADDu); + AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu); AdduInst.addOperand(MCOperand::createReg(DstReg)); AdduInst.addOperand(MCOperand::createReg(SrcReg)); AdduInst.addOperand(MCOperand::createReg(TrgReg)); @@ -2972,9 +3212,12 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { MCAsmParser &Parser = getParser(); SMLoc S; bool Result = true; + unsigned NumOfLParen = 0; - while (getLexer().getKind() == AsmToken::LParen) + while (getLexer().getKind() == AsmToken::LParen) { Parser.Lex(); + ++NumOfLParen; + } switch (getLexer().getKind()) { default: @@ -2985,7 +3228,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { case AsmToken::Minus: case AsmToken::Plus: if (isParenExpr) - Result = getParser().parseParenExpression(Res, S); + Result = getParser().parseParenExprOfDepth(NumOfLParen, Res, S); else Result = (getParser().parseExpression(Res)); while (getLexer().getKind() == AsmToken::RParen) @@ -3867,6 +4110,34 @@ bool MipsAsmParser::parseSetFpDirective() { return false; } +bool MipsAsmParser::parseSetOddSPRegDirective() { + MCAsmParser &Parser = getParser(); + + Parser.Lex(); // Eat "oddspreg". + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + getTargetStreamer().emitDirectiveSetOddSPReg(); + return false; +} + +bool MipsAsmParser::parseSetNoOddSPRegDirective() { + MCAsmParser &Parser = getParser(); + + Parser.Lex(); // Eat "nooddspreg". + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + getTargetStreamer().emitDirectiveSetNoOddSPReg(); + return false; +} + bool MipsAsmParser::parseSetPopDirective() { MCAsmParser &Parser = getParser(); SMLoc Loc = getLexer().getLoc(); @@ -4229,6 +4500,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetArchDirective(); } else if (Tok.getString() == "fp") { return parseSetFpDirective(); + } else if (Tok.getString() == "oddspreg") { + return parseSetOddSPRegDirective(); + } else if (Tok.getString() == "nooddspreg") { + return parseSetNoOddSPRegDirective(); } else if (Tok.getString() == "pop") { return parseSetPopDirective(); } else if (Tok.getString() == "push") { @@ -4428,6 +4703,8 @@ bool MipsAsmParser::parseInsnDirective() { /// ::= .module oddspreg /// ::= .module nooddspreg /// ::= .module fp=value +/// ::= .module softfloat +/// ::= .module hardfloat bool MipsAsmParser::parseDirectiveModule() { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); @@ -4446,8 +4723,16 @@ bool MipsAsmParser::parseDirectiveModule() { } if (Option == "oddspreg") { - getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32()); - clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + clearModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + // Synchronize the abiflags information with the FeatureBits information we + // changed above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated abiflags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted at the end). + getTargetStreamer().emitDirectiveModuleOddSPReg(); // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -4462,8 +4747,16 @@ bool MipsAsmParser::parseDirectiveModule() { return false; } - getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32()); - setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + setModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + // Synchronize the abiflags information with the FeatureBits information we + // changed above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated abiflags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted at the end). + getTargetStreamer().emitDirectiveModuleOddSPReg(); // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -4474,6 +4767,44 @@ bool MipsAsmParser::parseDirectiveModule() { return false; // parseDirectiveModule has finished successfully. } else if (Option == "fp") { return parseDirectiveModuleFP(); + } else if (Option == "softfloat") { + setModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + + // Synchronize the ABI Flags information with the FeatureBits information we + // updated above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated ABI Flags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted later). + getTargetStreamer().emitDirectiveModuleSoftFloat(); + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + return false; // parseDirectiveModule has finished successfully. + } else if (Option == "hardfloat") { + clearModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + + // Synchronize the ABI Flags information with the FeatureBits information we + // updated above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated ABI Flags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted later). + getTargetStreamer().emitDirectiveModuleHardFloat(); + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + return false; // parseDirectiveModule has finished successfully. } else { return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); } @@ -4502,8 +4833,15 @@ bool MipsAsmParser::parseDirectiveModuleFP() { return false; } - // Emit appropriate flags. - getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32()); + // Synchronize the abiflags information with the FeatureBits information we + // changed above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated abiflags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted at the end). + getTargetStreamer().emitDirectiveModuleFP(); + Parser.Lex(); // Consume the EndOfStatement. return false; } @@ -4512,6 +4850,7 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, StringRef Directive) { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); + bool ModuleLevelOptions = Directive == ".module"; if (Lexer.is(AsmToken::Identifier)) { StringRef Value = Parser.getTok().getString(); @@ -4528,6 +4867,13 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, } FpABI = MipsABIFlagsSection::FpABIKind::XX; + if (ModuleLevelOptions) { + setModuleFeatureBits(Mips::FeatureFPXX, "fpxx"); + clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } else { + setFeatureBits(Mips::FeatureFPXX, "fpxx"); + clearFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } return true; } @@ -4547,8 +4893,23 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, } FpABI = MipsABIFlagsSection::FpABIKind::S32; - } else + if (ModuleLevelOptions) { + clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx"); + clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } else { + clearFeatureBits(Mips::FeatureFPXX, "fpxx"); + clearFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } + } else { FpABI = MipsABIFlagsSection::FpABIKind::S64; + if (ModuleLevelOptions) { + clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx"); + setModuleFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } else { + clearFeatureBits(Mips::FeatureFPXX, "fpxx"); + setFeatureBits(Mips::FeatureFP64Bit, "fp64"); + } + } return true; } diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index c8629b5d7bd2..a34ba3bd0ee8 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -178,6 +178,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -1564,6 +1569,18 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::COP0RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -1855,6 +1872,6 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<23>(Insn) << 2)); + Inst.addOperand(MCOperand::createImm(SignExtend32<25>(Insn << 2))); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp index 725ea7f971eb..70b9cca8cf6e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp @@ -66,4 +66,4 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) { OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2 return OS; } -} // namespace llvm +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index bf306ee4814b..b078cd30a87b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -186,6 +186,6 @@ public: }; MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection); -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h index aa965e82a6bf..40c5681acc17 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h @@ -73,6 +73,6 @@ public: unsigned GetEhDataReg(unsigned I) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 5c746b2894b2..328e71720cac 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -59,10 +59,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case Mips::fixup_MIPS_PCLO16: break; case Mips::fixup_Mips_PC16: - // So far we are only using this type for branches. - // For branches we start 1 instruction after the branch - // so the displacement will be one instruction size less. - Value -= 4; // The displacement is then divided by 4 to give us an 18 bit // address range. Forcing a signed division because Value can be negative. Value = (int64_t)Value / 4; @@ -135,7 +131,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup"); break; case Mips::fixup_MIPS_PC21_S2: - Value -= 4; // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 21-bit signed immediate. @@ -143,7 +138,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup"); break; case Mips::fixup_MIPS_PC26_S2: - Value -= 4; // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 26-bit signed immediate. diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index fe84e4021d34..b3d5a4964f86 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -87,6 +87,6 @@ public: }; // class MipsAsmBackend -} // namespace llvm +} // namespace #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index a7d5a1e75e41..ff7779ec1e78 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -119,7 +119,7 @@ namespace MipsII { FormMask = 15 }; -} // namespace MipsII -} // namespace llvm +} +} #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index a45e2ad8cf16..9b2952720edd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -51,7 +51,7 @@ struct MipsRelocationEntry { virtual void sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) override; }; -} // namespace +} MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 93925bf8ca03..e36263d54ca4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -226,8 +226,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions or immediates"); - const MCExpr *Expr = MO.getExpr(); - Fixups.push_back(MCFixup::create(0, Expr, + const MCExpr *FixupExpression = MCBinaryExpr::createAdd( + MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, FixupExpression, MCFixupKind(Mips::fixup_Mips_PC16))); return 0; } @@ -315,8 +316,9 @@ getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getBranchTarget21OpValue expects only expressions or immediates"); - const MCExpr *Expr = MO.getExpr(); - Fixups.push_back(MCFixup::create(0, Expr, + const MCExpr *FixupExpression = MCBinaryExpr::createAdd( + MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, FixupExpression, MCFixupKind(Mips::fixup_MIPS_PC21_S2))); return 0; } @@ -337,8 +339,9 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getBranchTarget26OpValue expects only expressions or immediates"); - const MCExpr *Expr = MO.getExpr(); - Fixups.push_back(MCFixup::create(0, Expr, + const MCExpr *FixupExpression = MCBinaryExpr::createAdd( + MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, FixupExpression, MCFixupKind(Mips::fixup_MIPS_PC26_S2))); return 0; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index 81a0a987bc4e..687b800c2409 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -25,6 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg); MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll); -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 20358a0f9cf2..4069d7d184ed 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -62,7 +62,7 @@ namespace MIPS_MC { StringRef selectMipsCPU(const Triple &TT, StringRef CPU); } -} // namespace llvm +} // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 537867503eda..aef9bd3a8e2a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -265,4 +265,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, return S; } -} // namespace llvm +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 49116326139c..24b602810d6e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -79,6 +79,9 @@ void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg, if (GPR32RegClass->contains(CurrentSubReg) || GPR64RegClass->contains(CurrentSubReg)) ri_gprmask |= Value; + else if (COP0RegClass->contains(CurrentSubReg)) + ri_cprmask[0] |= Value; + // MIPS COP1 is the FPU. else if (FGR32RegClass->contains(CurrentSubReg) || FGR64RegClass->contains(CurrentSubReg) || AFGR64RegClass->contains(CurrentSubReg) || diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index a051f4c123fc..e4da2df75d47 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -92,15 +92,23 @@ void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { } -void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled, - bool IsO32ABI) { - if (!Enabled && !IsO32ABI) + +void MipsTargetStreamer::emitDirectiveModuleFP() {} + +void MipsTargetStreamer::emitDirectiveModuleOddSPReg() { + if (!ABIFlagsSection.OddSPReg && !ABIFlagsSection.Is32BitABI) report_fatal_error("+nooddspreg is only valid for O32"); } +void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {} +void MipsTargetStreamer::emitDirectiveModuleHardFloat() {} void MipsTargetStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetOddSPReg() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetNoOddSPReg() { + forbidModuleDirective(); +} MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) @@ -369,12 +377,9 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, forbidModuleDirective(); } -void MipsTargetAsmStreamer::emitDirectiveModuleFP( - MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) { - MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI); - +void MipsTargetAsmStreamer::emitDirectiveModuleFP() { OS << "\t.module\tfp="; - OS << ABIFlagsSection.getFpABIString(Value) << "\n"; + OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n"; } void MipsTargetAsmStreamer::emitDirectiveSetFp( @@ -385,11 +390,28 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp( OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } -void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled, - bool IsO32ABI) { - MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); +void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg() { + MipsTargetStreamer::emitDirectiveModuleOddSPReg(); + + OS << "\t.module\t" << (ABIFlagsSection.OddSPReg ? "" : "no") << "oddspreg\n"; +} - OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n"; +void MipsTargetAsmStreamer::emitDirectiveSetOddSPReg() { + MipsTargetStreamer::emitDirectiveSetOddSPReg(); + OS << "\t.set\toddspreg\n"; +} + +void MipsTargetAsmStreamer::emitDirectiveSetNoOddSPReg() { + MipsTargetStreamer::emitDirectiveSetNoOddSPReg(); + OS << "\t.set\tnooddspreg\n"; +} + +void MipsTargetAsmStreamer::emitDirectiveModuleSoftFloat() { + OS << "\t.module\tsoftfloat\n"; +} + +void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() { + OS << "\t.module\thardfloat\n"; } // This part is for ELF object output. @@ -800,10 +822,3 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() { OS << ABIFlagsSection; } - -void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled, - bool IsO32ABI) { - MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); - - ABIFlagsSection.OddSPReg = Enabled; -} diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td index 78ba76d27cbb..187a022b2563 100644 --- a/lib/Target/Mips/MicroMips32r6InstrFormats.td +++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td @@ -240,3 +240,50 @@ class ERETNC_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> { let Inst{15-6} = 0x3cd; let Inst{5-0} = 0x3c; } + +class BREAK_MMR6_ENC<string instr_asm> : MMR6Arch<instr_asm> { + bits<10> code_1; + bits<10> code_2; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-16} = code_1; + let Inst{15-6} = code_2; + let Inst{5-0} = 0x07; +} + +class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> { + bits<32> Inst; + + let Inst{31-26} = 0x0; + let Inst{25-21} = 0x0; + let Inst{20-16} = 0x0; + let Inst{15-11} = op; + let Inst{10-6} = 0x0; + let Inst{5-0} = 0x0; +} + +class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> { + bits<32> Inst; + bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt + + let Inst{31-26} = 0x00; + let Inst{25-21} = 0x00; + let Inst{20-16} = rt; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<instr_asm> { + bits<5> rd; + bits<5> rt; + bits<5> shamt; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rd; + let Inst{20-16} = rt; + let Inst{15-11} = shamt; + let Inst{10} = rotate; + let Inst{9-0} = funct; +} diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index ed71c3d9b5f6..53bde1379e29 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -29,6 +29,7 @@ class AUI_MMR6_ENC : AUI_FM_MMR6; class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>; class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>; class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>; +class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">; class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>; class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011111>; class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b111000>; @@ -40,6 +41,8 @@ class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>; class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>; class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>; class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>; +class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>; +class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>; class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">; class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">; class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>; @@ -60,6 +63,7 @@ class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>; class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>; class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>; class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>; +class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>; class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>; class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>; class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>; @@ -144,6 +148,8 @@ class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>; +class BRK_MMR6_DESC : BRK_FT<"break">; + class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> : MMR6Arch<instr_asm> { dag OutOperandList = (outs); @@ -166,6 +172,9 @@ class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>; class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>; +class EHB_MMR6_DESC : Barrier<"ehb">; +class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>; + class ERET_MMR6_DESC : ER_FT<"eret">; class ERETNC_MMR6_DESC : ER_FT<"eretnc">; @@ -255,6 +264,7 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>; class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>; +class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>; class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>; class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>; class MOD_MMR6_DESC : ArithLogicR<"mod", GPR32Opnd>; @@ -302,11 +312,14 @@ def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC, ISA_MICROMIPS32R6; +def BREAK_MMR6 : StdMMR6Rel, BRK_MMR6_DESC, BRK_MMR6_ENC, ISA_MICROMIPS32R6; def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6; def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6; def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6; def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6; def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6; +def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6; +def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6; def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6; def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC, ISA_MICROMIPS32R6; @@ -330,8 +343,18 @@ def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC, ISA_MICROMIPS32R6; +def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6; def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6; def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6; def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; } + +//===----------------------------------------------------------------------===// +// +// MicroMips instruction aliases +// +//===----------------------------------------------------------------------===// + +def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; +def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6; diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 2aab739dbd2b..39393840c6f2 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -934,3 +934,7 @@ class UncondBranchMMPseudo<string opstr> : def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>; def : MipsInstAlias<"nop", (MOVE16_MM ZERO, ZERO), 1>; } + +let Predicates = [InMicroMips] in { +def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2; +} diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 604b6704c033..671d7a87cc3d 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -31,6 +31,6 @@ namespace llvm { FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 2c33cfb96530..f281c927c1c4 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -42,6 +42,6 @@ public: RegScavenger *RS) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index f2831fd5d0f6..893fc7cdf473 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -62,7 +62,7 @@ namespace { }; char Mips16HardFloat::ID = 0; -} // namespace +} // // Return types that matter for hard float are: diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp index bf82108728de..2eb6e5ddd2d9 100644 --- a/lib/Target/Mips/Mips16HardFloatInfo.cpp +++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp @@ -46,5 +46,5 @@ extern FuncSignature const *findFuncSignature(const char *name) { } return nullptr; } -} // namespace Mips16HardFloatInfo -} // namespace llvm +} +} diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h index 8354c33d33bc..7295c287576d 100644 --- a/lib/Target/Mips/Mips16HardFloatInfo.h +++ b/lib/Target/Mips/Mips16HardFloatInfo.h @@ -44,7 +44,7 @@ struct FuncNameSignature { extern const FuncNameSignature PredefinedFuncs[]; extern FuncSignature const *findFuncSignature(const char *name); -} // namespace Mips16HardFloatInfo -} // namespace llvm +} +} #endif diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h index ce6b3f8486a9..ae0e61e19d9d 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.h +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -48,6 +48,6 @@ private: FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index c52ef2a4e195..846e3c964f44 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -54,7 +54,7 @@ struct Mips16IntrinsicHelperType{ return std::strcmp(Name, RHS.Name) == 0; } }; -} // namespace +} // Libcalls for which no helper is generated. Sorted by name for binary search. static const Mips16Libcall HardFloatLibCalls[] = { diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h index 99d3cacca67a..d3b9f750f347 100644 --- a/lib/Target/Mips/Mips16ISelLowering.h +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -77,6 +77,6 @@ namespace llvm { unsigned SltiOpc, unsigned SltiXOpc, MachineInstr *MI, MachineBasicBlock *BB )const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 1132d8a0318d..6540b40bc9ab 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -123,6 +123,6 @@ private: }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 83781ff24ac5..c37cf95cadc3 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -427,10 +427,10 @@ def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { -def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>; -def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3; -def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3; -def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3; +def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd>, MFC3OP_FM<0x10, 1>, ISA_MIPS3; +def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3; +def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3; +def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3; } //===----------------------------------------------------------------------===// @@ -613,10 +613,10 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs", ISA_MIPS3; // Two operand (implicit 0 selector) versions: -def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>; +def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>; let Predicates = [HasMips64, HasCnMips] in { def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>; diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h index 6b5d02b7a7e0..ae3c38ced80b 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.h +++ b/lib/Target/Mips/MipsAnalyzeImmediate.h @@ -58,6 +58,6 @@ namespace llvm { unsigned ADDiu, ORi, SLL, LUi; InstSeq Insts; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 1c80021086bd..fdba064b5c5e 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -747,8 +747,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // accept it. We therefore emit it when it contradicts the default or an // option has changed the default (i.e. FPXX) and omit it otherwise. if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX())) - getTargetStreamer().emitDirectiveModuleOddSPReg(STI.useOddSPReg(), - ABI.IsO32()); + getTargetStreamer().emitDirectiveModuleOddSPReg(); } void MipsAsmPrinter::emitInlineAsmStart() const { diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 3c2b843b8963..a7f3304a3da8 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -145,7 +145,7 @@ public: void EmitEndOfAsmFile(Module &M) override; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h index 04a9ef5ef051..081c393a09be 100644 --- a/lib/Target/Mips/MipsCCState.h +++ b/lib/Target/Mips/MipsCCState.h @@ -131,6 +131,6 @@ public: bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 3d020abe2704..c2651b82d285 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -143,7 +144,7 @@ private: unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); - unsigned materializeExternalCallSym(const char *SynName); + unsigned materializeExternalCallSym(MCSymbol *Syn); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); @@ -369,12 +370,12 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } -unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) { +unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) { const TargetRegisterClass *RC = &Mips::GPR32RegClass; unsigned DestReg = createResultReg(RC); emitInst(Mips::LW, DestReg) .addReg(MFI->getGlobalBaseReg()) - .addExternalSymbol(SymName, MipsII::MO_GOT); + .addSym(Sym, MipsII::MO_GOT); return DestReg; } @@ -1234,7 +1235,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) @@ -1286,8 +1287,8 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { // Issue the call. unsigned DestAddress; - if (SymName) - DestAddress = materializeExternalCallSym(SymName); + if (Symbol) + DestAddress = materializeExternalCallSym(Symbol); else DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index dab9c055df6f..5eabd58e8686 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -49,6 +49,6 @@ protected: const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST); const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST); -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index 83be74f0d466..1426d0fbf516 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -129,6 +129,6 @@ private: unsigned ConstraintID, std::vector<SDValue> &OutOps) override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index e4f3cde0c804..bc9a1ce64097 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -204,7 +204,7 @@ namespace llvm { SDL, SDR }; - } // namespace MipsISD + } //===--------------------------------------------------------------------===// // TargetLowering Implementation @@ -566,6 +566,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 3daff5fa5d36..08efc3509046 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -146,6 +146,6 @@ private: const MipsInstrInfo *createMips16InstrInfo(const MipsSubtarget &STI); const MipsInstrInfo *createMipsSEInstrInfo(const MipsSubtarget &STI); -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 2a7949eb15eb..ab98c9054e74 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1050,8 +1050,12 @@ class SCBase<string opstr, RegisterOperand RO> : let Constraints = "$rt = $dst"; } -class MFC3OP<string asmstr, RegisterOperand RO> : - InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins), +class MFC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> : + InstSE<(outs RO:$rt), (ins RD:$rd, uimm16:$sel), + !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>; + +class MTC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> : + InstSE<(outs RO:$rd), (ins RD:$rt, uimm16:$sel), !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>; class TrapBase<Instruction RealInst> @@ -1278,7 +1282,9 @@ def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>, def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>, ISA_MIPS2_NOT_32R6_64R6; -def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>; +let AdditionalPredicates = [NotInMicroMips] in { +def BREAK : MMRel, StdMMR6Rel, BRK_FT<"break">, BRK_FM<0xd>; +} def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>; def TRAP : TrapBase<BREAK>; def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6; @@ -1288,7 +1294,9 @@ def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32; } def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32; -def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2; +let AdditionalPredicates = [NotInMicroMips] in { +def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2; +} def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2; let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug @@ -1484,10 +1492,10 @@ def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers -def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32; -def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32; -def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>; -def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>; +def MFC0 : MFC3OP<"mfc0", GPR32Opnd, COP0Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32; +def MTC0 : MTC3OP<"mtc0", COP0Opnd, GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32; +def MFC2 : MFC3OP<"mfc2", GPR32Opnd, COP2Opnd>, MFC3OP_FM<0x12, 0>; +def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>; class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary, FrmOther, asmstr>; @@ -1603,11 +1611,13 @@ def : MipsInstAlias<"or $rs, $rt, $imm", (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; def : MipsInstAlias<"or $rs, $imm", (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>; +let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; -def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +} +def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>; +def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 COP0Opnd:$rd, GPR32Opnd:$rt, 0), 0>; +def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, COP2Opnd:$rd, 0), 0>; +def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 COP2Opnd:$rd, GPR32Opnd:$rt, 0), 0>; let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; } @@ -1623,7 +1633,9 @@ def : MipsInstAlias<"syscall", (SYSCALL 0), 1>; def : MipsInstAlias<"break", (BREAK 0, 0), 1>; def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; +let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2; +} def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2; def : MipsInstAlias<"teq $rs, $rt", @@ -1707,6 +1719,12 @@ def BLEU : CondBranchPseudo<"bleu">; def BGEU : CondBranchPseudo<"bgeu">; def BGTU : CondBranchPseudo<"bgtu">; +def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr), + "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6; + +def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr), + "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 6b2a44d7a893..80d9b75b85b7 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -88,6 +88,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, Offset += MO.getOffset(); break; + case MachineOperand::MO_MCSymbol: + Symbol = MO.getMCSymbol(); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_JumpTableIndex: Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); break; @@ -141,6 +146,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, case MachineOperand::MO_MachineBasicBlock: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_MCSymbol: case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_BlockAddress: diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index a8bd1cd78d1d..1ce27e401850 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -45,6 +45,6 @@ private: MCSymbolRefExpr::VariantKind Kind) const; bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index 8568137ff374..b18a673912f8 100644 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -37,7 +37,7 @@ namespace { }; char MipsModuleDAGToDAGISel::ID = 0; -} // namespace +} bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n"); diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h index 746feab1d9a8..23f0b7070d62 100644 --- a/lib/Target/Mips/MipsOptionRecord.h +++ b/lib/Target/Mips/MipsOptionRecord.h @@ -49,6 +49,7 @@ public: FGR64RegClass = &(TRI->getRegClass(Mips::FGR64RegClassID)); AFGR64RegClass = &(TRI->getRegClass(Mips::AFGR64RegClassID)); MSA128BRegClass = &(TRI->getRegClass(Mips::MSA128BRegClassID)); + COP0RegClass = &(TRI->getRegClass(Mips::COP0RegClassID)); COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID)); COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID)); } @@ -66,6 +67,7 @@ private: const MCRegisterClass *FGR64RegClass; const MCRegisterClass *AFGR64RegClass; const MCRegisterClass *MSA128BRegClass; + const MCRegisterClass *COP0RegClass; const MCRegisterClass *COP2RegClass; const MCRegisterClass *COP3RegClass; uint32_t ri_gprmask; diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 5c71272e99be..b6cd79193cfc 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -43,7 +43,7 @@ namespace { }; char MipsOs16::ID = 0; -} // namespace +} // Figure out if we need float point based on the function signature. // We need to move variables in and/or out of floating point diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 7497a2556738..02bcac5a3ddb 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -201,6 +201,10 @@ let Namespace = "Mips" in { foreach I = 0-7 in def FCC#I : MipsReg<#I, "fcc"#I>; + // COP0 registers. + foreach I = 0-31 in + def COP0#I : MipsReg<#I, ""#I>; + // COP2 registers. foreach I = 0-31 in def COP2#I : MipsReg<#I, ""#I>; @@ -431,6 +435,10 @@ def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; +// Coprocessor 0 registers. +def COP0 : RegisterClass<"Mips", [i32], 32, (sequence "COP0%u", 0, 31)>, + Unallocatable; + // Coprocessor 2 registers. def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>, Unallocatable; @@ -559,6 +567,10 @@ def HWRegsAsmOperand : MipsAsmRegOperand { let Name = "HWRegsAsmReg"; } +def COP0AsmOperand : MipsAsmRegOperand { + let Name = "COP0AsmReg"; +} + def COP2AsmOperand : MipsAsmRegOperand { let Name = "COP2AsmReg"; } @@ -609,6 +621,10 @@ def ACC64DSPOpnd : RegisterOperand<ACC64DSP> { let ParserMatchClass = ACC64DSPAsmOperand; } +def COP0Opnd : RegisterOperand<COP0> { + let ParserMatchClass = COP0AsmOperand; +} + def COP2Opnd : RegisterOperand<COP2> { let ParserMatchClass = COP2AsmOperand; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index a858f30b94a8..ec7bf314c641 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -75,7 +75,7 @@ private: const MipsSEInstrInfo &TII; const MipsRegisterInfo &RegInfo; }; -} // namespace +} ExpandPseudo::ExpandPseudo(MachineFunction &MF_) : MF(MF_), MRI(MF.getRegInfo()), diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index ee56b8b8c8ff..2fcd6bbb9a15 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -39,6 +39,6 @@ public: unsigned ehDataReg(unsigned I) const; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index fb2f04121556..a894034020e9 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -126,6 +126,6 @@ private: FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 623630a18078..d44f8d82ec3e 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -112,6 +112,6 @@ namespace llvm { MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI, MachineBasicBlock *BB) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index cdafe9f4d48b..bebbabf7b838 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -113,6 +113,6 @@ private: MachineBasicBlock::iterator I) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h index feddf9808264..061423fbeb86 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.h +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -26,6 +26,6 @@ public: ~MipsSelectionDAGInfo(); }; -} // namespace llvm +} #endif diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index c8a2e4bd72c5..5f9296812e1c 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -292,6 +292,6 @@ public: return &InstrItins; } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 976970ccbcc6..38b2ecff7d7f 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -90,6 +90,6 @@ public: CodeGenOpt::Level OL); }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 39cadc1e0f83..6ce1be707d04 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -80,22 +80,15 @@ public: virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg); - /// Emit a '.module fp=value' directive using the given values. - /// Updates the .MIPS.abiflags section - virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, - bool Is32BitABI) { - ABIFlagsSection.setFpABI(Value, Is32BitABI); - } - - /// Emit a '.module fp=value' directive using the current values of the - /// .MIPS.abiflags section. - void emitDirectiveModuleFP() { - emitDirectiveModuleFP(ABIFlagsSection.getFpABI(), - ABIFlagsSection.Is32BitABI); - } - - virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI); + // FP abiflags directives + virtual void emitDirectiveModuleFP(); + virtual void emitDirectiveModuleOddSPReg(); + virtual void emitDirectiveModuleSoftFloat(); + virtual void emitDirectiveModuleHardFloat(); virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value); + virtual void emitDirectiveSetOddSPReg(); + virtual void emitDirectiveSetNoOddSPReg(); + void forbidModuleDirective() { ModuleDirectiveAllowed = false; } void reallowModuleDirective() { ModuleDirectiveAllowed = true; } bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } @@ -198,11 +191,14 @@ public: void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; - // ABI Flags - void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, - bool Is32BitABI) override; - void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; + // FP abiflags directives + void emitDirectiveModuleFP() override; + void emitDirectiveModuleOddSPReg() override; + void emitDirectiveModuleSoftFloat() override; + void emitDirectiveModuleHardFloat() override; void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; + void emitDirectiveSetOddSPReg() override; + void emitDirectiveSetNoOddSPReg() override; }; // This part is for ELF object output @@ -244,9 +240,7 @@ public: void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; - // ABI Flags - void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; void emitMipsAbiFlags(); }; -} // namespace llvm +} #endif diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 99e950eba80f..05fe06dbc07c 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources NVPTXLowerAggrCopies.cpp NVPTXLowerKernelArgs.cpp NVPTXLowerAlloca.cpp + NVPTXPeephole.cpp NVPTXMCExpr.cpp NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 8144f3fde730..02c5a210d099 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -49,6 +49,6 @@ public: raw_ostream &O, const char *Modifier = nullptr); }; -} // namespace llvm +} #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index b55664ed32a7..a72ae2ef53a7 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -94,7 +94,7 @@ enum { IsSurfTexQueryFlag = 0x800, IsTexModeUnifiedFlag = 0x1000 }; -} // namespace NVPTXII -} // namespace llvm +} +} #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 8a28b089ce35..221d2f093aeb 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -54,7 +54,10 @@ createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { static MCCodeGenInfo *createNVPTXMCCodeGenInfo( StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->initMCCodeGenInfo(RM, CM, OL); + + // The default relocation model is used regardless of what the client has + // specified, as it is the only relocation model currently supported. + X->initMCCodeGenInfo(Reloc::Default, CM, OL); return X; } diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index 1480b61afdbe..a2d670f8d39d 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -43,6 +43,6 @@ public: } }; -} // namespace llvm +} #endif diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index d06d61f5e550..fe28214e9588 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -71,6 +71,7 @@ MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM); BasicBlockPass *createNVPTXLowerAllocaPass(); +MachineFunctionPass *createNVPTXPeephole(); bool isImageOrSamplerVal(const Value *, const Module *); @@ -133,7 +134,7 @@ enum VecType { V2 = 2, V4 = 4 }; -} // namespace PTXLdStInstCode +} /// PTXCvtMode - Conversion code enumeration namespace PTXCvtMode { @@ -152,7 +153,7 @@ enum CvtMode { FTZ_FLAG = 0x10, SAT_FLAG = 0x20 }; -} // namespace PTXCvtMode +} /// PTXCmpMode - Comparison mode enumeration namespace PTXCmpMode { @@ -180,9 +181,9 @@ enum CmpMode { BASE_MASK = 0xFF, FTZ_FLAG = 0x100 }; -} // namespace PTXCmpMode -} // namespace NVPTX -} // namespace llvm +} +} +} // end namespace llvm; // Defines symbolic names for NVPTX registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 1a1a8ca7c666..cadd7a46cd9d 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -109,7 +109,7 @@ void VisitGlobalVariableForEmission( Visited.insert(GV); Visiting.erase(GV); } -} // namespace +} void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) @@ -826,7 +826,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(TM.getDataLayout()); + Mang = new Mangler(); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1, STI); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 12d80a34a4e8..f6f7685e76f9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -349,6 +349,6 @@ public: DebugLoc prevDebugLoc; void emitLineNumberAsDotLoc(const MachineInstr &); }; -} // namespace llvm +} // end of namespace #endif diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 2d5e74c4c4bf..7d4be8e809cf 100644 --- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -38,7 +38,7 @@ public: /// \brief Clean up the name to remove symbols invalid in PTX. std::string cleanUpName(StringRef Name); }; -} // namespace +} char NVPTXAssignValidGlobalNames::ID = 0; diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index 3eb7024ff08a..69a229e32f43 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -107,7 +107,7 @@ private: /// Helper function for bitcasts. Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; -} // namespace +} char NVPTXFavorNonGenericAddrSpaces::ID = 0; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 5503494fc3c8..9b34aef3fdec 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -35,35 +35,33 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { if (MF.getFrameInfo()->hasStackObjects()) { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); - // Insert "mov.u32 %SP, %Depot" - MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineInstr *MI = MBB.begin(); + MachineRegisterInfo &MR = MF.getRegInfo(); + // This instruction really occurs before first instruction // in the BB, so giving it no debug location. DebugLoc dl = DebugLoc(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // mov %SPL, %depot; - // cvta.local %SP, %SPL; - if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { - unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); - MachineInstr *MI = - BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get( - NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(LocalReg); - BuildMI(MBB, MI, dl, - MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), - LocalReg).addImm(MF.getFunctionNumber()); - } else { - unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass); - MachineInstr *MI = - BuildMI(MBB, MBBI, dl, - MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(LocalReg); - BuildMI(MBB, MI, dl, - MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), - LocalReg).addImm(MF.getFunctionNumber()); + // Emits + // mov %SPL, %depot; + // cvta.local %SP, %SPL; + // for local address accesses in MF. + bool Is64Bit = + static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit(); + unsigned CvtaLocalOpcode = + (Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes); + unsigned MovDepotOpcode = + (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR); + if (!MR.use_empty(NVPTX::VRFrame)) { + // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL". + MI = BuildMI(MBB, MI, dl, + MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode), + NVPTX::VRFrame) + .addReg(NVPTX::VRFrameLocal); } + BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode), + NVPTX::VRFrameLocal) + .addImm(MF.getFunctionNumber()); } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 488edecc6e7b..14f8bb7b98fe 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -31,6 +31,6 @@ public: MachineBasicBlock::iterator I) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 5879df31f8a6..fe20580c83a2 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -95,6 +95,6 @@ private: bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; }; -} // namespace +} #endif diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b5af72ab855a..09e0bd5d3d88 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -206,7 +206,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); // Turn FP truncstore into trunc + store. + // FIXME: vector types should also be expanded setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 276851f872ea..ed94775b3002 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -427,7 +427,7 @@ enum NodeType : unsigned { Suld3DV4I16Zero, Suld3DV4I32Zero }; -} // namespace NVPTXISD +} class NVPTXSubtarget; diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index c86f861acd55..aa36b6be7250 100644 --- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -42,7 +42,7 @@ private: Value *cleanupValue(Value *V); void replaceWith(Instruction *From, ConstantInt *To); }; -} // namespace +} char NVPTXImageOptimizer::ID = 0; diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp index 24dcb122b94e..b533f316d8a9 100644 --- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -132,6 +132,10 @@ void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { assert(!Arg->hasByValAttr() && "byval params should be handled by handleByValParam"); + // Do nothing if the argument already points to the global address space. + if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) + return; + Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); Instruction *ArgInGlobal = new AddrSpaceCastInst( Arg, PointerType::get(Arg->getType()->getPointerElementType(), diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 4b9322c77a40..10f1135ad841 100644 --- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -46,6 +46,6 @@ public: return ImageHandleList[Idx].c_str(); } }; -} // namespace llvm +} #endif diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp new file mode 100644 index 000000000000..a61c291d233f --- /dev/null +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -0,0 +1,154 @@ +//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning +// of a MachineFunction. +// +// mov %SPL, %depot +// cvta.local %SP, %SPL +// +// Because Frame Index is a generic address and alloca can only return generic +// pointer, without this pass the instructions producing alloca'ed address will +// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on +// this address with their .local versions, but this may introduce a lot of +// cvta.to.local instructions. Performance can be improved if we avoid casting +// address back and forth and directly calculate local address based on %SPL. +// This peephole pass optimizes these cases, for example +// +// It will transform the following pattern +// %vreg0<def> = LEA_ADDRi64 %VRFrame, 4 +// %vreg1<def> = cvta_to_local_yes_64 %vreg0 +// +// into +// %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4 +// +// %VRFrameLocal is the virtual register name of %SPL +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptx-peephole" + +namespace llvm { +void initializeNVPTXPeepholePass(PassRegistry &); +} + +namespace { +struct NVPTXPeephole : public MachineFunctionPass { + public: + static char ID; + NVPTXPeephole() : MachineFunctionPass(ID) { + initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "NVPTX optimize redundant cvta.to.local instruction"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char NVPTXPeephole::ID = 0; + +INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false) + +static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { + auto &MBB = *Root.getParent(); + auto &MF = *MBB.getParent(); + // Check current instruction is cvta.to.local + if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 && + Root.getOpcode() != NVPTX::cvta_to_local_yes) + return false; + + auto &Op = Root.getOperand(1); + const auto &MRI = MF.getRegInfo(); + MachineInstr *GenericAddrDef = nullptr; + if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) { + GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); + } + + // Check the register operand is uniquely defined by LEA_ADDRi instruction + if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || + (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && + GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { + return false; + } + + // Check the LEA_ADDRi operand is Frame index + auto &BaseAddrOp = GenericAddrDef->getOperand(1); + if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) { + return true; + } + + return false; +} + +static void CombineCVTAToLocal(MachineInstr &Root) { + auto &MBB = *Root.getParent(); + auto &MF = *MBB.getParent(); + const auto &MRI = MF.getRegInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + + MachineInstrBuilder MIB = + BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), + Root.getOperand(0).getReg()) + .addReg(NVPTX::VRFrameLocal) + .addOperand(Prev.getOperand(2)); + + MBB.insert((MachineBasicBlock::iterator)&Root, MIB); + + // Check if MRI has only one non dbg use, which is Root + if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) { + Prev.eraseFromParentAndMarkDBGValuesForRemoval(); + } + Root.eraseFromParentAndMarkDBGValuesForRemoval(); +} + +bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + // Loop over all of the basic blocks. + for (auto &MBB : MF) { + // Traverse the basic block. + auto BlockIter = MBB.begin(); + + while (BlockIter != MBB.end()) { + auto &MI = *BlockIter++; + if (isCVTAToLocalCombinationCandidate(MI)) { + CombineCVTAToLocal(MI); + Changed = true; + } + } // Instruction + } // Basic Block + + // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal + const auto &MRI = MF.getRegInfo(); + if (MRI.use_empty(NVPTX::VRFrame)) { + if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) { + MI->eraseFromParentAndMarkDBGValuesForRemoval(); + } + } + + return Changed; +} + +MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index ea58f7787489..5fd69a6815a8 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -39,7 +39,7 @@ public: private: void calculateFrameObjectOffsets(MachineFunction &Fn); }; -} // namespace +} MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() { return new NVPTXPrologEpilogPass(); diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 3ef997b006fa..6e97f9efbc27 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -69,7 +69,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { } return ""; } -} // namespace llvm +} NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {} diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index efcee6b6f2bd..ff6ccc457db7 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -65,5 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>; def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. -def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot, +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot, (sequence "ENVREG%u", 0, 31))>; diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index bb0adc59a3fd..e83f735a551e 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -45,7 +45,7 @@ private: bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx); }; -} // namespace +} char NVPTXReplaceImageHandles::ID = 0; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index d4520451d37d..c7287719be5f 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -103,6 +103,6 @@ public: void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index c071ee82abc6..9d9072efc382 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -210,6 +210,10 @@ bool NVPTXPassConfig::addInstSelector() { void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 4d937c6a8bec..7e2ce73daaa3 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -91,6 +91,6 @@ void dumpInstRec(Value *v, std::set<Instruction *> *visited); void dumpInstRec(Value *v); void dumpParent(Value *v); -} // namespace llvm +} #endif diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index 1c2043069e1e..5e375b7852e4 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -75,7 +75,7 @@ private: bool handleFunction(Function *ReflectFunction); void setVarMap(); }; -} // namespace +} ModulePass *llvm::createNVVMReflectPass() { return new NVVMReflect(); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 36119d5d7e46..992be5b966c1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -31,7 +31,7 @@ namespace { bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; -} // namespace +} PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index ad614f2ddf35..ae43e59d3cb1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -50,7 +50,7 @@ enum Fixups { LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; -} // namespace PPC -} // namespace llvm +} +} #endif diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 489905b26fcc..5c38fe173d96 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -219,7 +219,7 @@ public: llvm_unreachable("Unknown pseudo-op: .localentry"); } }; -} // namespace +} static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 18818a1c335e..77fe45882289 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -81,7 +81,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { return false; } -} // namespace llvm +} // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, // undefine PPC here. PPC may be predefined on some hosts. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 9b5491f92491..9d7289658f0f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -51,7 +51,7 @@ public: FixedValue); } }; -} // namespace +} /// computes the log2 of the size of the relocation, /// used for relocation_info::r_length. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index ff9b059d906a..6075631a541f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -62,7 +62,7 @@ namespace PPC { /// Assume the condition register is set by MI(a,b), return the predicate if /// we modify the instructions such that condition register is set by MI(b,a). Predicate getSwappedPredicate(Predicate Opcode); -} // namespace PPC -} // namespace llvm +} +} #endif diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 49f77b538c1b..ae8d8b4f5dfe 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -98,6 +98,6 @@ namespace llvm { }; } // end namespace PPCII -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 2b6030aea2b1..940d55ac1f36 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -51,7 +51,7 @@ namespace { } }; char PPCBSel::ID = 0; -} // namespace +} INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", false, false) diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h index 550cac62927e..eb904a858592 100644 --- a/lib/Target/PowerPC/PPCCallingConv.h +++ b/lib/Target/PowerPC/PPCCallingConv.h @@ -29,7 +29,7 @@ inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, return false; } -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index 9cd9c2faa51f..fc89753ed94e 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -191,7 +191,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, "PowerPC Early-Return Creation", false, false) diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 82ff5307d0b7..fafcd76f9d18 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1448,9 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + const MCSymbol *Symbol = CLI.Symbol; - if (!Callee && !SymName) + if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. @@ -2347,4 +2347,4 @@ namespace llvm { return new PPCFastISel(FuncInfo, LibInfo); return nullptr; } -} // namespace llvm +} diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index b232863c9614..28d074ecd79d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -93,6 +93,6 @@ public: const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 5f9f9f2e341f..c85c2610d2f5 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -234,7 +234,7 @@ private: SDNode *transferMemOperands(SDNode *N, SDNode *Result); }; -} // namespace +} /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, @@ -2773,18 +2773,6 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { else DM[i] = 1; - // For little endian, we must swap the input operands and adjust - // the mask elements (reverse and invert them). - if (PPCSubTarget->isLittleEndian()) { - std::swap(Op1, Op2); - unsigned tmp = DM[0]; - DM[0] = 1 - DM[1]; - DM[1] = 1 - tmp; - } - - SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, - MVT::i32); - if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && isa<LoadSDNode>(Op1.getOperand(0))) { @@ -2800,6 +2788,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } } + // For little endian, we must swap the input operands and adjust + // the mask elements (reverse and invert them). + if (PPCSubTarget->isLittleEndian()) { + std::swap(Op1, Op2); + unsigned tmp = DM[0]; + DM[0] = 1 - DM[1]; + DM[1] = 1 - tmp; + } + + SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, + MVT::i32); SDValue Ops[] = { Op1, Op2, DMV }; return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1cdfb4178544..594472bbb47b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, } } +/** + * \brief Common function used to match vmrgew and vmrgow shuffles + * + * The indexOffset determines whether to look for even or odd words in + * the shuffle mask. This is based on the of the endianness of the target + * machine. + * - Little Endian: + * - Use offset of 0 to check for odd elements + * - Use offset of 4 to check for even elements + * - Big Endian: + * - Use offset of 0 to check for even elements + * - Use offset of 4 to check for odd elements + * A detailed description of the vector element ordering for little endian and + * big endian can be found at + * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html + * Targeting your applications - what little endian and big endian IBM XL C/C++ + * compiler differences mean to you + * + * The mask to the shuffle vector instruction specifies the indices of the + * elements from the two input vectors to place in the result. The elements are + * numbered in array-access order, starting with the first vector. These vectors + * are always of type v16i8, thus each vector will contain 16 elements of size + * 8. More info on the shuffle vector can be found in the + * http://llvm.org/docs/LangRef.html#shufflevector-instruction + * Language Reference. + * + * The RHSStartValue indicates whether the same input vectors are used (unary) + * or two different input vectors are used, based on the following: + * - If the instruction uses the same vector for both inputs, the range of the + * indices will be 0 to 15. In this case, the RHSStart value passed should + * be 0. + * - If the instruction has two different vectors then the range of the + * indices will be 0 to 31. In this case, the RHSStart value passed should + * be 16 (indices 0-15 specify elements in the first vector while indices 16 + * to 31 specify elements in the second vector). + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] IndexOffset Specifies whether to look for even or odd elements + * \param[in] RHSStartValue Specifies the starting index for the righthand input + * vector to the shuffle_vector instruction + * \return true iff this shuffle vector represents an even or odd word merge + */ +static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, + unsigned RHSStartValue) { + if (N->getValueType(0) != MVT::v16i8) + return false; + + for (unsigned i = 0; i < 2; ++i) + for (unsigned j = 0; j < 4; ++j) + if (!isConstantOrUndef(N->getMaskElt(i*4+j), + i*RHSStartValue+j+IndexOffset) || + !isConstantOrUndef(N->getMaskElt(i*4+j+8), + i*RHSStartValue+j+IndexOffset+8)) + return false; + return true; +} + +/** + * \brief Determine if the specified shuffle mask is suitable for the vmrgew or + * vmrgow instructions. + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) + * \param[in] ShuffleKind Identify the type of merge: + * - 0 = big-endian merge with two different inputs; + * - 1 = either-endian merge with two identical inputs; + * - 2 = little-endian merge with two different inputs (inputs are swapped for + * little-endian merges). + * \param[in] DAG The current SelectionDAG + * \return true iff this shuffle mask + */ +bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + unsigned indexOffset = CheckEven ? 4 : 0; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 2) // swapped + return isVMerge(N, indexOffset, 16); + else + return false; + } + else { + unsigned indexOffset = CheckEven ? 0 : 4; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 0) // Normal + return isVMerge(N, indexOffset, 16); + else + return false; + } + return false; +} /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. @@ -3765,7 +3858,7 @@ struct TailCallArgumentInfo { TailCallArgumentInfo() : FrameIdx(0) {} }; -} // namespace +} /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void @@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { return Op; } } @@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -9863,7 +9960,9 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); Chain = Intrin->getChain(); - Base = Intrin->getBasePtr(); + // Similarly to the store case below, Intrin->getBasePtr() doesn't get + // us what we want. Get operand 2 instead. + Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index c33d60565b79..02242b512a4f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -353,7 +353,7 @@ namespace llvm { /// the last operand. TOC_ENTRY }; - } // namespace PPCISD + } /// Define some predicates that are used for node matching. namespace PPC { @@ -382,6 +382,11 @@ namespace llvm { bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG); + /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for + /// a VMRGEW or VMRGOW instruction + bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, + unsigned ShuffleKind, SelectionDAG &DAG); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, @@ -405,7 +410,7 @@ namespace llvm { /// If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int isQVALIGNIShuffleMask(SDNode *N); - } // namespace PPC + } class PPCTargetLowering : public TargetLowering { const PPCSubtarget &Subtarget; @@ -871,6 +876,6 @@ namespace llvm { CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -} // namespace llvm +} #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 9ff604bbee9d..cb0271fe8d0c 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), }]>; +def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG); +}]>; +def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG); +}]>; +def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG); +}]>; +def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG); +}]>; +def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG); +}]>; +def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG); +}]>; + + + def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N)); }]>; @@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; } // isCommutable +// Vector merge +def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgew $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgow $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>; + +// Match vmrgew(x,x) and vmrgow(x,x) +def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef), + (VMRGEW $vA, $vA)>; +def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef), + (VMRGOW $vA, $vA)>; + +// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be swapped for correct +// semantics.w +def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGEW $vB, $vA)>; +def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGOW $vB, $vA)>; + + // Vector shifts def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h index ec94fa5580ff..cf71b1c59869 100644 --- a/lib/Target/PowerPC/PPCInstrBuilder.h +++ b/lib/Target/PowerPC/PPCInstrBuilder.h @@ -38,6 +38,6 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, return MIB.addFrameIndex(FI).addImm(Offset); } -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d3bb7a63c622..696a83860e53 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -352,15 +352,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, bool isPPC64 = Subtarget.isPPC64(); // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } + if (!isUnpredicatedTerminator(I)) return false; @@ -513,14 +508,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, } unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 39bf4547733c..e2d6346aa532 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -237,6 +237,6 @@ public: void getNoopForMachoTarget(MCInst &NopInst) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index d08b80871f3e..43ba4994fde6 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -457,22 +457,34 @@ let Uses = [RM] in { defm XVCMPEQDP : XX3Form_Rcr<60, 99, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v2i64:$XT, + (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>; defm XVCMPEQSP : XX3Form_Rcr<60, 67, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v4i32:$XT, + (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>; defm XVCMPGEDP : XX3Form_Rcr<60, 115, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v2i64:$XT, + (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>; defm XVCMPGESP : XX3Form_Rcr<60, 83, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v4i32:$XT, + (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>; defm XVCMPGTDP : XX3Form_Rcr<60, 107, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v2i64:$XT, + (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>; defm XVCMPGTSP : XX3Form_Rcr<60, 75, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, + [(set v4i32:$XT, + (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>; // Move Instructions def XSABSDP : XX2Form<60, 345, diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index e783b5e65333..b4e1c099f190 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -88,7 +88,7 @@ namespace { const TargetTransformInfo *TTI; const DataLayout *DL; }; -} // namespace +} char PPCLoopDataPrefetch::ID = 0; INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 1891b6315c51..b6e7799402e1 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -87,7 +87,7 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; }; -} // namespace +} char PPCLoopPreIncPrep::ID = 0; static const char *name = "Prepare loop for pre-inc. addressing modes"; @@ -113,7 +113,7 @@ namespace { protected: ScalarEvolution *SE; }; -} // namespace +} static bool IsPtrInBounds(Value *BasePtr) { Value *StrippedBasePtr = BasePtr; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index c44d5d70f8dc..76837ecb32de 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -57,7 +57,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); - Mang->getNameWithPrefix(Name, MO.getSymbolName()); + Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL); } else { const GlobalValue *GV = MO.getGlobal(); TM.getNameWithPrefix(Name, GV, *Mang); diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index d2eaeb42dbc4..2c1378d5670d 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -26,6 +26,6 @@ public: ~PPCSelectionDAGInfo(); }; -} // namespace llvm +} #endif diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index ea17e1c189b8..e9cc3d4bd5bc 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -58,7 +58,7 @@ namespace PPC { DIR_PWR8, DIR_64 }; -} // namespace PPC +} class GlobalValue; class TargetMachine; @@ -286,6 +286,6 @@ public: bool enableSubRegLiveness() const override; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 7a9db0fabb07..2dc0d825c80d 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -156,7 +156,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, "PowerPC TLS Dynamic Call Fixup", false, false) diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 61b963fe6da5..bf165c9edc6e 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -145,7 +145,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE, "PowerPC TOC Register Dependencies", false, false) diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index a5c4c23c7901..dbe7617d3542 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -22,6 +22,6 @@ public: virtual void emitAbiVersion(int AbiVersion) = 0; virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0; }; -} // namespace llvm +} #endif diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 537db656fd60..5e3ae2a4471b 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -165,7 +165,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, "PowerPC VSX Copy Legalization", false, false) diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index a029ddf0bc08..f352fa647ace 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -317,7 +317,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, "PowerPC VSX FMA Mutation", false, false) diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 939293a5638e..e7ab71ac2106 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -79,7 +79,6 @@ struct PPCVSXSwapEntry { unsigned int IsStore : 1; unsigned int IsSwap : 1; unsigned int MentionsPhysVR : 1; - unsigned int HasImplicitSubreg : 1; unsigned int IsSwappable : 1; unsigned int SpecialHandling : 3; unsigned int WebRejected : 1; @@ -224,7 +223,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineInstr &MI : MBB) { bool RelevantInstr = false; - bool ImplicitSubreg = false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) @@ -232,8 +230,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { unsigned Reg = MO.getReg(); if (isVecReg(Reg)) { RelevantInstr = true; - if (MO.getSubReg() != 0) - ImplicitSubreg = true; break; } } @@ -249,9 +245,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { PPCVSXSwapEntry SwapEntry{}; int VecIdx = addSwapEntry(&MI, SwapEntry); - if (ImplicitSubreg) - SwapVector[VecIdx].HasImplicitSubreg = 1; - switch(MI.getOpcode()) { default: // Unless noted otherwise, an instruction is considered @@ -260,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // select, compare, etc.). SwapVector[VecIdx].IsSwappable = 1; break; - case PPC::XXPERMDI: + case PPC::XXPERMDI: { // This is a swap if it is of the form XXPERMDI t, s, s, 2. // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2, @@ -268,9 +261,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // SUBREG_TO_REG to find the real source value for comparison. // If the real source value is a physical register, then mark the // XXPERMDI as mentioning a physical register. - // Any other form of XXPERMDI is lane-sensitive and unsafe - // for the optimization. - if (MI.getOperand(3).getImm() == 2) { + int immed = MI.getOperand(3).getImm(); + if (immed == 2) { unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), VecIdx); unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), @@ -278,7 +270,26 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (trueReg1 == trueReg2) SwapVector[VecIdx].IsSwap = 1; } + // This is a doubleword splat if it is of the form + // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we + // must look through chains of copy-likes to find the source + // register. We turn off the marking for mention of a physical + // register, because splatting it is safe; the optimization + // will not swap the value in the physical register. + else if (immed == 0 || immed == 3) { + unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), + VecIdx); + unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), + VecIdx); + if (trueReg1 == trueReg2) { + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].MentionsPhysVR = 0; + } + } + // Any other form of XXPERMDI is lane-sensitive and unsafe + // for the optimization. break; + } case PPC::LVX: // Non-permuting loads are currently unsafe. We can use special // handling for this in the future. By not marking these as @@ -307,14 +318,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { SwapVector[VecIdx].IsStore = 1; SwapVector[VecIdx].IsSwap = 1; break; - case PPC::SUBREG_TO_REG: - // These are fine provided they are moving between full vector - // register classes. For example, the VRs are a subset of the - // VSRs, but each VR and each VSR is a full 128-bit register. - if (isVecReg(MI.getOperand(0).getReg()) && - isVecReg(MI.getOperand(2).getReg())) - SwapVector[VecIdx].IsSwappable = 1; - break; case PPC::COPY: // These are fine provided they are moving between full vector // register classes. @@ -349,7 +352,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::LVSL: case PPC::LVSR: case PPC::LVXL: - case PPC::LXVDSX: case PPC::STVEBX: case PPC::STVEHX: case PPC::STVEWX: @@ -457,23 +459,19 @@ int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI, // such operations to the ultimate source register. If a // physical register is encountered, we stop the search and // flag the swap entry indicated by VecIdx (the original -// XXPERMDI) as mentioning a physical register. Similarly -// for implicit subregister mentions (which should never -// happen). +// XXPERMDI) as mentioning a physical register. unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, unsigned VecIdx) { MachineInstr *MI = MRI->getVRegDef(SrcReg); if (!MI->isCopyLike()) return SrcReg; - unsigned CopySrcReg, CopySrcSubreg; - if (MI->isCopy()) { + unsigned CopySrcReg; + if (MI->isCopy()) CopySrcReg = MI->getOperand(1).getReg(); - CopySrcSubreg = MI->getOperand(1).getSubReg(); - } else { + else { assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); CopySrcReg = MI->getOperand(2).getReg(); - CopySrcSubreg = MI->getOperand(2).getSubReg(); } if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { @@ -481,11 +479,6 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, return CopySrcReg; } - if (CopySrcSubreg != 0) { - SwapVector[VecIdx].HasImplicitSubreg = 1; - return CopySrcReg; - } - return lookThruCopyLike(CopySrcReg, VecIdx); } @@ -552,11 +545,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId); - // Reject webs containing mentions of physical registers or implicit - // subregs, or containing operations that we don't know how to handle - // in a lane-permuted region. + // Reject webs containing mentions of physical registers, or containing + // operations that we don't know how to handle in a lane-permuted region. if (SwapVector[EntryIdx].MentionsPhysVR || - SwapVector[EntryIdx].HasImplicitSubreg || !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) { SwapVector[Repr].WebRejected = 1; @@ -765,8 +756,6 @@ void PPCVSXSwapRemoval::dumpSwapVector() { DEBUG(dbgs() << "swap "); if (SwapVector[EntryIdx].MentionsPhysVR) DEBUG(dbgs() << "physreg "); - if (SwapVector[EntryIdx].HasImplicitSubreg) - DEBUG(dbgs() << "implsubreg "); if (SwapVector[EntryIdx].IsSwappable) { DEBUG(dbgs() << "swappable "); @@ -809,7 +798,7 @@ void PPCVSXSwapRemoval::dumpSwapVector() { DEBUG(dbgs() << "\n"); } -} // namespace +} // end default namespace INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE, "PowerPC VSX Swap Removal", false, false) diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 59f011aefe66..3e56b9e9b883 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -41,7 +41,7 @@ public: raw_ostream &VStream, raw_ostream &CStream) const override; }; -} // namespace +} namespace llvm { extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 800a5f254b8f..0be60fd7a051 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -36,7 +36,7 @@ namespace { unsigned Type) const override; }; -} // namespace +} unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h index 34c58da10d5d..8d79396d936e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h @@ -91,7 +91,7 @@ namespace llvm { LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; - } // namespace Sparc -} // namespace llvm + } +} #endif diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 8f62de4a4fd2..a9c9f15454ec 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -41,7 +41,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, bool IsLIttleEndian, uint8_t OSABI); -} // namespace llvm +} // End llvm namespace // Defines symbolic names for Sparc registers. This defines a mapping from // register name to register number. diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index 133af8694139..96378d522dc0 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -33,7 +33,7 @@ namespace llvm { void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); -} // namespace llvm +} // end namespace llvm; namespace llvm { // Enums corresponding to Sparc condition codes, both icc's and fcc's. These @@ -74,7 +74,7 @@ namespace llvm { FCC_ULE = 14+16, // Unordered or Less or Equal FCC_O = 15+16 // Ordered }; - } // namespace SPCC + } inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { switch (CC) { diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 3d73bbd0d90c..bb3b78861cbd 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -55,6 +55,6 @@ private: }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index a4b9c79c3264..b6bc3d255713 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -49,7 +49,7 @@ namespace llvm { TLS_LD, TLS_CALL }; - } // namespace SPISD + } class SparcTargetLowering : public TargetLowering { const SparcSubtarget *Subtarget; diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index b59dd896019c..15673f134d80 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -96,6 +96,6 @@ public: unsigned getGlobalBaseReg(MachineFunction *MF) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index b1f795b81e8f..a02bae07a336 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -353,13 +353,6 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } -let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in - def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>; - -let rd = 0 in - def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22), - "unimp $imm22", []>; - // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. This has to handle all // permutations of selection between i32/f32/f64 on ICC and FCC. @@ -406,36 +399,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in { [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>; } -// JMPL Instruction. -let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, - DecoderMethod = "DecodeJMPL" in { - def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr), - "jmpl $addr, $dst", []>; - def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr), - "jmpl $addr, $dst", []>; -} - -// Section A.3 - Synthetic Instructions, p. 85 -// special cases of JMPL: -let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, - isCodeGenOnly = 1 in { - let rd = 0, rs1 = 15 in - def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), - "jmp %o7+$val", [(retflag simm13:$val)]>; - - let rd = 0, rs1 = 31 in - def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), - "jmp %i7+$val", []>; -} - -let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, - isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in { - def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr), - "rett $addr", []>; - def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr), - "rett $addr", []>; -} - // Section B.1 - Load Integer Instructions, p. 90 let DecoderMethod = "DecodeLoadInt" in { defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8, IntRegs, i32>; @@ -470,6 +433,24 @@ let DecoderMethod = "DecodeStoreQFP" in defm STQF : Store<"stq", 0b100110, store, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; +// Section B.8 - SWAP Register with Memory Instruction +// (Atomic swap) +let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in { + def SWAPrr : F3_1<3, 0b001111, + (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val), + "swap [$addr], $dst", + [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>; + def SWAPri : F3_2<3, 0b001111, + (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val), + "swap [$addr], $dst", + [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>; + def SWAPArr : F3_1_asi<3, 0b011111, + (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val), + "swapa [$addr] $asi, $dst", + [/*FIXME: pattern?*/]>; +} + + // Section B.9 - SETHI Instruction, p. 104 def SETHIi: F2_1<0b100, (outs IntRegs:$rd), (ins i32imm:$imm22), @@ -725,6 +706,56 @@ let Uses = [O6], } } +// Section B.25 - Jump and Link Instruction + +// JMPL Instruction. +let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, + DecoderMethod = "DecodeJMPL" in { + def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr), + "jmpl $addr, $dst", []>; + def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr), + "jmpl $addr, $dst", []>; +} + +// Section A.3 - Synthetic Instructions, p. 85 +// special cases of JMPL: +let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, + isCodeGenOnly = 1 in { + let rd = 0, rs1 = 15 in + def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %o7+$val", [(retflag simm13:$val)]>; + + let rd = 0, rs1 = 31 in + def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %i7+$val", []>; +} + +// Section B.26 - Return from Trap Instruction +let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, + isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in { + def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr), + "rett $addr", []>; + def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr), + "rett $addr", []>; +} + + +// Section B.27 - Trap on Integer Condition Codes Instruction +multiclass TRAP<string regStr> { + def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2, + CCOp:$cond), + !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>; + def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm, + CCOp:$cond), + !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>; +} + +let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in + defm TICC : TRAP<"%icc">; + +let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in + def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>; + // Section B.28 - Read State Register Instructions let rs2 = 0 in def RDASR : F3_1<2, 0b101000, @@ -787,6 +818,18 @@ let Predicates = [HasNoV9] in { } } +// Section B.30 - STBAR Instruction +let hasSideEffects = 1, rd = 0, rs1 = 0b01111, rs2 = 0 in + def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>; + + +// Section B.31 - Unimplmented Instruction +let rd = 0 in + def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22), + "unimp $imm22", []>; + +// Section B.33 - Floating-point Operate (FPop) Instructions + // Convert Integer to Floating-point Instructions, p. 141 def FITOS : F3_3u<2, 0b110100, 0b011000100, (outs FPRegs:$rd), (ins FPRegs:$rs2), @@ -1168,29 +1211,10 @@ let rs1 = 0 in def : Pat<(ctpop i32:$src), (POPCrr (SRLri $src, 0))>; -// Atomic swap. -let hasSideEffects =1, rd = 0, rs1 = 0b01111, rs2 = 0 in - def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>; - let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13), "membar $simm13", []>; -let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in { - def SWAPrr : F3_1<3, 0b001111, - (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val), - "swap [$addr], $dst", - [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>; - def SWAPri : F3_2<3, 0b001111, - (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val), - "swap [$addr], $dst", - [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>; - def SWAPArr : F3_1_asi<3, 0b011111, - (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val), - "swapa [$addr] $asi, $dst", - [/*FIXME: pattern?*/]>; -} - // TODO: Should add a CASArr variant. In fact, the CAS instruction, // unlike other instructions, only comes in a form which requires an // ASI be provided. The ASI value hardcoded here is ASI_PRIMARY, the @@ -1215,18 +1239,6 @@ let hasSideEffects = 1 in { } } -multiclass TRAP<string regStr> { - def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2, - CCOp:$cond), - !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>; - def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm, - CCOp:$cond), - !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>; -} - -let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in - defm TICC : TRAP<"%icc">; - //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 0471443f5961..104744279d9d 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -51,6 +51,6 @@ namespace llvm { void setLeafProc(bool rhs) { IsLeafProc = rhs; } bool isLeafProc() const { return IsLeafProc; } }; -} // namespace llvm +} #endif diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index 2ceae82c8cdb..6818291b30b4 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -26,6 +26,6 @@ public: ~SparcSelectionDAGInfo() override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 0eb3d6593fe6..75fd37f01a19 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -96,7 +96,10 @@ struct SystemZAddressingMode { // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { - return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; + assert(Count <= 64); + if (Count > 63) + return UINT64_MAX; + return (uint64_t(1) << Count) - 1; } // Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation @@ -903,6 +906,8 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return nullptr; RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); unsigned Count = 0; while (expandRxSBG(RISBG)) @@ -958,6 +963,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return nullptr; // Try treating each operand of N as the second operand of the RxSBG // and see which goes deepest. RxSBGOperands RxSBG[] = { @@ -993,8 +1002,6 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { Opcode = SystemZ::RISBGN; } - SDLoc DL(N); - EVT VT = N->getValueType(0); SDValue Ops[5] = { convertTo(DL, MVT::i64, Op0), convertTo(DL, MVT::i64, RxSBG[I].Input), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 75845796de79..372f6fb3ea50 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2005,17 +2005,17 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, else if (Cond == ISD::SETLT || Cond == ISD::SETULT) // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, // always true for CC>3. - C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) // ...and the inverse of that. - C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; else if (Cond == ISD::SETLE || Cond == ISD::SETULE) // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), // always true for CC>3. - C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) // ...and the inverse of that. - C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; else llvm_unreachable("Unexpected integer comparison type"); C.CCMask &= CCValid; @@ -3292,7 +3292,7 @@ struct Permute { unsigned Operand; unsigned char Bytes[SystemZ::VectorBytes]; }; -} // namespace +} static const Permute PermuteForms[] = { // VMRHG @@ -3574,7 +3574,7 @@ struct GeneralShuffle { // The type of the shuffle result. EVT VT; }; -} // namespace +} // Add an extra undefined element to the shuffle. void GeneralShuffle::addUndef() { diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt new file mode 100644 index 000000000000..df04c2a3460b --- /dev/null +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_TARGET_DEFINITIONS WebAssembly.td) + +tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(WebAssemblyCommonTableGen) + +add_llvm_target(WebAssemblyCodeGen + WebAssemblyFrameLowering.cpp + WebAssemblyInstrInfo.cpp + WebAssemblyISelDAGToDAG.cpp + WebAssemblyISelLowering.cpp + WebAssemblyMachineFunctionInfo.cpp + WebAssemblyRegisterInfo.cpp + WebAssemblySelectionDAGInfo.cpp + WebAssemblySubtarget.cpp + WebAssemblyTargetMachine.cpp + WebAssemblyTargetTransformInfo.cpp +) + +add_dependencies(LLVMWebAssemblyCodeGen intrinsics_gen) + +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..5394b67d2b87 --- /dev/null +++ b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMWebAssemblyAsmPrinter + WebAssemblyInstPrinter.cpp + ) diff --git a/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt new file mode 100644 index 000000000000..54df6d65570a --- /dev/null +++ b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt -------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = WebAssemblyAsmPrinter +parent = WebAssembly +required_libraries = MC Support +add_to_library_groups = WebAssembly diff --git a/lib/Target/WebAssembly/InstPrinter/Makefile b/lib/Target/WebAssembly/InstPrinter/Makefile new file mode 100644 index 000000000000..87534379f796 --- /dev/null +++ b/lib/Target/WebAssembly/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/WebAssembly/AsmPrinter/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMWebAssemblyAsmPrinter + +# Hack: we need to include 'main' wasm target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp new file mode 100644 index 000000000000..fbb985aaafbb --- /dev/null +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -0,0 +1,43 @@ +//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Print MCInst instructions to wasm format. +/// +//===----------------------------------------------------------------------===// + +#include "InstPrinter/WebAssemblyInstPrinter.h" +#include "WebAssembly.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include <cctype> +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + +void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, + unsigned RegNo) const { + llvm_unreachable("TODO: implement printRegName"); +} + +void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, + const MCSubtargetInfo &STI) { + llvm_unreachable("TODO: implement printInst"); +} diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h new file mode 100644 index 000000000000..70fcef214ce2 --- /dev/null +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -0,0 +1,38 @@ +// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This class prints an WebAssembly MCInst to wasm file syntax. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class MCOperand; +class MCSubtargetInfo; + +class WebAssemblyInstPrinter : public MCInstPrinter { +public: + WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI); + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt new file mode 100644 index 000000000000..04ef9c4e4bcf --- /dev/null +++ b/lib/Target/WebAssembly/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/WebAssembly/LLVMBuild.txt -------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = WebAssembly +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = WebAssemblyCodeGen +parent = WebAssembly +required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyDesc WebAssemblyInfo +add_to_library_groups = WebAssembly diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..ccc0f0d7ccbc --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMWebAssemblyDesc + WebAssemblyMCAsmInfo.cpp + WebAssemblyMCTargetDesc.cpp +) diff --git a/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 000000000000..ce7cb5dd4daf --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt ------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = WebAssemblyDesc +parent = WebAssembly +required_libraries = MC Support WebAssemblyAsmPrinter WebAssemblyInfo +add_to_library_groups = WebAssembly diff --git a/lib/Target/WebAssembly/MCTargetDesc/Makefile b/lib/Target/WebAssembly/MCTargetDesc/Makefile new file mode 100644 index 000000000000..11dcb4ff6075 --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/WebAssembly/TargetDesc/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMWebAssemblyDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp new file mode 100644 index 000000000000..55346f71c6fc --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -0,0 +1,53 @@ +//===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declarations of the WebAssemblyMCAsmInfo +/// properties. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCAsmInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-mc-asm-info" + +WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {} + +WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { + PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit(); + + // TODO: What should MaxInstLength be? + + PrivateGlobalPrefix = ""; + PrivateLabelPrefix = ""; + + UseDataRegionDirectives = true; + + Data8bitsDirective = "\t.int8\t"; + Data16bitsDirective = "\t.int16\t"; + Data32bitsDirective = "\t.int32\t"; + Data64bitsDirective = "\t.int64\t"; + + AlignmentIsInBytes = false; + COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + SupportsDebugInformation = true; + + // For now, WebAssembly does not support exceptions. + ExceptionsType = ExceptionHandling::None; + + // TODO: UseIntegratedAssembler? +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h new file mode 100644 index 000000000000..d2b8fb7748fc --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h @@ -0,0 +1,32 @@ +//===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declaration of the WebAssemblyMCAsmInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + +class Triple; + +class WebAssemblyMCAsmInfo final : public MCAsmInfo { +public: + explicit WebAssemblyMCAsmInfo(const Triple &T); + ~WebAssemblyMCAsmInfo() override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp new file mode 100644 index 000000000000..d248556c62d7 --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -0,0 +1,56 @@ +//===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file provides WebAssembly-specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCTargetDesc.h" +#include "InstPrinter/WebAssemblyInstPrinter.h" +#include "WebAssemblyMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-mc-target-desc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "WebAssemblyGenSubtargetInfo.inc" + +static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { + MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT); + return MAI; +} + +static MCInstPrinter * +createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant, + const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0 || SyntaxVariant == 1) + return new WebAssemblyInstPrinter(MAI, MII, MRI); + return nullptr; +} + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyTargetMC() { + for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter); + } +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h new file mode 100644 index 000000000000..24893daec7ea --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -0,0 +1,53 @@ +//==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file provides WebAssembly-specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" +#include <string> + +namespace llvm { + +class formatted_raw_ostream; +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCRegisterInfo; +class MCObjectWriter; +class MCStreamer; +class MCSubtargetInfo; +class MCTargetStreamer; +class StringRef; +class Target; +class Triple; +class raw_ostream; + +extern Target TheWebAssemblyTarget32; +extern Target TheWebAssemblyTarget64; + +MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); + +} // end namespace llvm + +// Defines symbolic names for WebAssembly registers. This defines a mapping from +// register name to register number. +// +#define GET_SUBTARGETINFO_ENUM +#include "WebAssemblyGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile new file mode 100644 index 000000000000..35d835c6506c --- /dev/null +++ b/lib/Target/WebAssembly/Makefile @@ -0,0 +1,19 @@ +##===- lib/Target/WebAssembly/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMWebAssemblyCodeGen +TARGET = WebAssembly + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc + +DIRS = InstPrinter TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt new file mode 100644 index 000000000000..7a71060a638f --- /dev/null +++ b/lib/Target/WebAssembly/README.txt @@ -0,0 +1,15 @@ +//===-- README.txt - Notes for WebAssembly code gen -----------------------===// + +This WebAssembly backend is presently in a very early stage of development. +The code should build and not break anything else, but don't expect a lot more +at this point. + +For more information on WebAssembly itself, see the design documents: + * https://github.com/WebAssembly/design/blob/master/README.md + +The following documents contain some information on the planned semantics and +binary encoding of WebAssembly itself: + * https://github.com/WebAssembly/design/blob/master/AstSemantics.md + * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt new file mode 100644 index 000000000000..ef6e4d2b617a --- /dev/null +++ b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMWebAssemblyInfo + WebAssemblyTargetInfo.cpp + ) + +add_dependencies(LLVMWebAssemblyInfo WebAssemblyCommonTableGen) diff --git a/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt new file mode 100644 index 000000000000..f4da9239bbe8 --- /dev/null +++ b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt --------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = WebAssemblyInfo +parent = WebAssembly +required_libraries = Support +add_to_library_groups = WebAssembly diff --git a/lib/Target/WebAssembly/TargetInfo/Makefile b/lib/Target/WebAssembly/TargetInfo/Makefile new file mode 100644 index 000000000000..b021eb6d9455 --- /dev/null +++ b/lib/Target/WebAssembly/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/WebAssembly/TargetInfo/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMWebAssemblyInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp new file mode 100644 index 000000000000..ddb1eb1d1892 --- /dev/null +++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp @@ -0,0 +1,30 @@ +//===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file registers the WebAssembly target. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-target-info" + +Target llvm::TheWebAssemblyTarget32; +Target llvm::TheWebAssemblyTarget64; + +extern "C" void LLVMInitializeWebAssemblyTargetInfo() { + RegisterTarget<Triple::wasm32> X(TheWebAssemblyTarget32, "wasm32", + "WebAssembly 32-bit"); + RegisterTarget<Triple::wasm64> Y(TheWebAssemblyTarget64, "wasm64", + "WebAssembly 64-bit"); +} diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h new file mode 100644 index 000000000000..3ff19d46f437 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -0,0 +1,31 @@ +//===-- WebAssembly.h - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the entry points for global functions defined in +/// the LLVM WebAssembly back-end. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H + +#include "llvm/Support/CodeGen.h" + +namespace llvm { + +class WebAssemblyTargetMachine; +class FunctionPass; + +FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, + CodeGenOpt::Level OptLevel); + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td new file mode 100644 index 000000000000..a123bf6f66b6 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssembly.td @@ -0,0 +1,62 @@ +//- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a target description file for the WebAssembly architecture, which is +// also known as "wasm". +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// WebAssembly Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false", + "Enable 128-bit SIMD">; + +//===----------------------------------------------------------------------===// +// Architectures. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "WebAssemblyRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrInfo.td" + +def WebAssemblyInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// WebAssembly Processors supported. +//===----------------------------------------------------------------------===// + +// Minimal Viable Product. +def : ProcessorModel<"mvp", NoSchedModel, []>; + +// Latest and greatest experimental version of WebAssembly. Bugs included! +def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>; + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def WebAssembly : Target { + let InstructionSet = WebAssemblyInstrInfo; +} diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp new file mode 100644 index 000000000000..e4ca82e963c2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -0,0 +1,74 @@ +//===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the WebAssembly implementation of +/// TargetFrameLowering class. +/// +/// On WebAssembly, there aren't a lot of things to do here. There are no +/// callee-saved registers to save, and no spill slots. +/// +/// The stack grows downward. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyFrameLowering.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-frame-info" + +// TODO: Implement a red zone? + +/// Return true if the specified function should have a dedicated frame pointer +/// register. +bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { + llvm_unreachable("TODO: implement hasFP"); +} + +/// Under normal circumstances, when a frame pointer is not required, we reserve +/// argument space for call sites in the function immediately on entry to the +/// current function. This eliminates the need for add/sub sp brackets around +/// call sites. Returns true if the call frame is included as part of the stack +/// frame. +bool WebAssemblyFrameLowering::hasReservedCallFrame( + const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr"); +} + +void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + llvm_unreachable("TODO: implement emitPrologue"); +} + +void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + llvm_unreachable("TODO: implement emitEpilogue"); +} + +void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan( + MachineFunction &MF, RegScavenger *RS) const { + llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan"); +} diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h new file mode 100644 index 000000000000..0b112d02c0bf --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -0,0 +1,48 @@ +// WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This class implements WebAssembly-specific bits of +/// TargetFrameLowering class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H + +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +class WebAssemblyFrameLowering final : public TargetFrameLowering { +public: + WebAssemblyFrameLowering() + : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16, + /*LocalAreaOffset=*/0, + /*TransientStackAlignment=*/16, + /*StackRealignable=*/true) {} + + void + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + /// These methods insert prolog and epilog code into the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp new file mode 100644 index 000000000000..518ef332a6c7 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -0,0 +1,73 @@ +//- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines an instruction selector for the WebAssembly target. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Function.h" // To access function attributes. +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-isel" + +//===--------------------------------------------------------------------===// +/// WebAssembly-specific code to select WebAssembly machine instructions for +/// SelectionDAG operations. +/// +namespace { +class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + + bool ForCodeSize; + +public: + WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) { + } + + const char *getPassName() const override { + return "WebAssembly Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + ForCodeSize = + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || + MF.getFunction()->hasFnAttribute(Attribute::MinSize); + Subtarget = &MF.getSubtarget<WebAssemblySubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + SDNode *Select(SDNode *Node) override; + +private: + // add select functions here... +}; +} // end anonymous namespace + +SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { + llvm_unreachable("TODO: implement Select"); +} + +/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready +/// for instruction scheduling. +FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new WebAssemblyDAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp new file mode 100644 index 000000000000..4eec02efbd94 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -0,0 +1,63 @@ +//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblyTargetLowering class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyISelLowering.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "WebAssemblyTargetObjectFile.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower" + +WebAssemblyTargetLowering::WebAssemblyTargetLowering( + const TargetMachine &TM, const WebAssemblySubtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { + // WebAssembly does not produce floating-point exceptions on normal floating + // point operations. + setHasFloatingPointExceptions(false); +} + +//===----------------------------------------------------------------------===// +// WebAssembly Lowering private implementation. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly Optimization Hooks +//===----------------------------------------------------------------------===// + +MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + return getDataSection(); +} diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h new file mode 100644 index 000000000000..efd60a7bacd6 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -0,0 +1,49 @@ +//- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the interfaces that WebAssembly uses to lower LLVM +/// code into a selection DAG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +namespace WebAssemblyISD { + +enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... +}; + +} // end namespace WebAssemblyISD + +class WebAssemblySubtarget; +class WebAssemblyTargetMachine; + +class WebAssemblyTargetLowering final : public TargetLowering { +public: + WebAssemblyTargetLowering(const TargetMachine &TM, + const WebAssemblySubtarget &STI); + +private: + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td new file mode 100644 index 000000000000..35e88eec8573 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -0,0 +1,46 @@ +// WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// WebAssembly Atomic operand code-gen constructs. +// +//===----------------------------------------------------------------------===// + +// TODO: Implement atomic instructions. + +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// TODO: add atomic fences here... + +//===----------------------------------------------------------------------===// +// Atomic loads +//===----------------------------------------------------------------------===// + +// TODO: add atomic loads here... + +//===----------------------------------------------------------------------===// +// Atomic stores +//===----------------------------------------------------------------------===// + +// TODO: add atomic stores here... + +//===----------------------------------------------------------------------===// +// Low-level exclusive operations +//===----------------------------------------------------------------------===// + +// TODO: add exclusive operations here... + +// Load-exclusives. + +// Store-exclusives. + +// Store-release-exclusives. + +// And clear exclusive. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td new file mode 100644 index 000000000000..8bbf3e9ec87b --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -0,0 +1,28 @@ +// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// WebAssembly instruction format definitions. +// +//===----------------------------------------------------------------------===// + +// WebAssembly Instruction Format +class WebAssemblyInst<string cstr> : Instruction { + field bits<0> Inst; // Instruction encoding. + let Namespace = "WebAssembly"; + let Pattern = []; + let Constraints = cstr; +} + +// Normal instructions +class I<dag oops, dag iops, list<dag> pattern, string cstr = ""> + : WebAssemblyInst<cstr> { + dag OutOperandList = oops; + dag InOperandList = iops; + let Pattern = pattern; +} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp new file mode 100644 index 000000000000..ea8937c8f9f2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -0,0 +1,28 @@ +//===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the WebAssembly implementation of the +/// TargetInstrInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyInstrInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-instr-info" + +WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) + : RI(STI.getTargetTriple()) {} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h new file mode 100644 index 000000000000..1c4ae22f16d6 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -0,0 +1,37 @@ +//=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the WebAssembly implementation of the +/// TargetInstrInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H + +#include "WebAssemblyRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +class WebAssemblySubtarget; + +class WebAssemblyInstrInfo final { + const WebAssemblyRegisterInfo RI; + +public: + explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI); + + const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td new file mode 100644 index 000000000000..142eccfbcaa5 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -0,0 +1,46 @@ +// WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// WebAssembly Instruction definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// + +def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">; +def HasAddr64 : Predicate<"Subtarget->hasAddr64()">; +def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, + AssemblerPredicate<"FeatureSIMD128", "simd128">; + +//===----------------------------------------------------------------------===// +// WebAssembly-specific DAG Node Types. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly-specific DAG Nodes. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly-specific Operands. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly Instruction Format Definitions. +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Additional sets of instructions. +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrAtomics.td" +include "WebAssemblyInstrSIMD.td" diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td new file mode 100644 index 000000000000..e25483ad3f7a --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -0,0 +1,15 @@ +// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// WebAssembly SIMD operand code-gen constructs. +// +//===----------------------------------------------------------------------===// + +// TODO: Implement SIMD instructions. +// Note: use Requires<[HasSIMD128]>. diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp new file mode 100644 index 000000000000..542d984b9006 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -0,0 +1,19 @@ +//=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements WebAssembly-specific per-machine-function +/// information. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMachineFunctionInfo.h" +using namespace llvm; + +WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {} diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h new file mode 100644 index 000000000000..fc5e910b09ef --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -0,0 +1,37 @@ +// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares WebAssembly-specific per-machine-function +/// information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H + +#include "WebAssemblyRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +/// This class is derived from MachineFunctionInfo and contains private +/// WebAssembly-specific information for each MachineFunction. +class WebAssemblyFunctionInfo final : public MachineFunctionInfo { + MachineFunction &MF; + +public: + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + ~WebAssemblyFunctionInfo() override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp new file mode 100644 index 000000000000..ad24c90af6a2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -0,0 +1,33 @@ +//===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the WebAssembly implementation of the +/// TargetRegisterInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyRegisterInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyFrameLowering.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-info" + +WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {} diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h new file mode 100644 index 000000000000..55300287a51e --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -0,0 +1,35 @@ +// WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the WebAssembly implementation of the +/// WebAssemblyRegisterInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H + +namespace llvm { + +class MachineFunction; +class RegScavenger; +class TargetRegisterClass; +class Triple; + +class WebAssemblyRegisterInfo final { + const Triple &TT; + +public: + explicit WebAssemblyRegisterInfo(const Triple &TT); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td new file mode 100644 index 000000000000..7b3d636a2605 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -0,0 +1,28 @@ +//WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the WebAssembly register classes and some nominal +// physical registers. +// +//===----------------------------------------------------------------------===// + +class WebAssemblyReg<string n> : Register<n> { + let Namespace = "WebAssembly"; +} + +class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList> + : RegisterClass<"WebAssembly", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp new file mode 100644 index 000000000000..cfd1bafff236 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -0,0 +1,23 @@ +//===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblySelectionDAGInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-selectiondag-info" + +WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL) + : TargetSelectionDAGInfo(DL) {} + +WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {} diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h new file mode 100644 index 000000000000..03e8d393558d --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -0,0 +1,31 @@ +//=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the WebAssembly subclass for +/// TargetSelectionDAGInfo. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo { +public: + explicit WebAssemblySelectionDAGInfo(const DataLayout *DL); + ~WebAssemblySelectionDAGInfo() override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp new file mode 100644 index 000000000000..addea8e3cc36 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -0,0 +1,48 @@ +//===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssembly-specific subclass of +/// TargetSubtarget. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyInstrInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "WebAssemblyGenSubtargetInfo.inc" + +WebAssemblySubtarget & +WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) { + // Determine default and user-specified characteristics + + if (CPUString.empty()) + CPUString = "generic"; + + ParseSubtargetFeatures(CPUString, FS); + return *this; +} + +WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, + const std::string &CPU, + const std::string &FS, + const TargetMachine &TM) + : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false), + CPUString(CPU), TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS)), + TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} + +bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h new file mode 100644 index 000000000000..6f1761940930 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -0,0 +1,79 @@ +//=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the WebAssembly-specific subclass of +/// TargetSubtarget. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H + +#include "WebAssemblyFrameLowering.h" +#include "WebAssemblyISelLowering.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblySelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "WebAssemblyGenSubtargetInfo.inc" + +namespace llvm { + +class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { + bool HasSIMD128; + + /// String name of used CPU. + std::string CPUString; + + /// What processor and OS we're targeting. + Triple TargetTriple; + + WebAssemblyFrameLowering FrameLowering; + WebAssemblyInstrInfo InstrInfo; + WebAssemblySelectionDAGInfo TSInfo; + WebAssemblyTargetLowering TLInfo; + + /// Initializes using CPUString and the passed in feature string so that we + /// can use initializer lists for subtarget initialization. + WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS); + +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + WebAssemblySubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM); + + const WebAssemblySelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const WebAssemblyFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const WebAssemblyTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const Triple &getTargetTriple() const { return TargetTriple; } + bool enableMachineScheduler() const override; + bool useAA() const override { return true; } + + // Predicates used by WebAssemblyInstrInfo.td. + bool hasAddr64() const { return TargetTriple.isArch64Bit(); } + bool hasSIMD128() const { return HasSIMD128; } + + /// Parses features string setting specified subtarget options. Definition of + /// function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp new file mode 100644 index 000000000000..6f93248bd13c --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -0,0 +1,173 @@ +//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the WebAssembly-specific subclass of TargetMachine. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyTargetMachine.h" +#include "WebAssemblyTargetObjectFile.h" +#include "WebAssemblyTargetTransformInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm" + +extern "C" void LLVMInitializeWebAssemblyTarget() { + // Register the target. + RegisterTargetMachine<WebAssemblyTargetMachine> X(TheWebAssemblyTarget32); + RegisterTargetMachine<WebAssemblyTargetMachine> Y(TheWebAssemblyTarget64); +} + +//===----------------------------------------------------------------------===// +// WebAssembly Lowering public interface. +//===----------------------------------------------------------------------===// + +/// Create an WebAssembly architecture model. +/// +WebAssemblyTargetMachine::WebAssemblyTargetMachine( + const Target &T, const Triple &TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT.isArch64Bit() + ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128" + : "e-p:32:32-i64:64-v128:8:128-n32:64-S128", + TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique<WebAssemblyTargetObjectFile>()) { + initAsmInfo(); + + // We need a reducible CFG, so disable some optimizations which tend to + // introduce irreducibility. + setRequiresStructuredCFG(true); +} + +WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {} + +const WebAssemblySubtarget * +WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); + } + return I.get(); +} + +namespace { +/// WebAssembly Code Generator Pass Configuration Options. +class WebAssemblyPassConfig final : public TargetPassConfig { +public: + WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { + return getTM<WebAssemblyTargetMachine>(); + } + + FunctionPass *createTargetRegisterAllocator(bool) override; + void addFastRegAlloc(FunctionPass *RegAllocPass) override; + void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; + + void addIRPasses() override; + bool addPreISel() override; + bool addInstSelector() override; + bool addILPOpts() override; + void addPreRegAlloc() override; + void addRegAllocPasses(bool Optimized); + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; +} // end anonymous namespace + +TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); + }); +} + +TargetPassConfig * +WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { + return new WebAssemblyPassConfig(this, PM); +} + +FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { + return nullptr; // No reg alloc +} + +void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + assert(!RegAllocPass && "WebAssembly uses no regalloc!"); + addRegAllocPasses(false); +} + +void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + assert(!RegAllocPass && "WebAssembly uses no regalloc!"); + addRegAllocPasses(true); +} + +//===----------------------------------------------------------------------===// +// The following functions are called from lib/CodeGen/Passes.cpp to modify +// the CodeGen pass sequence. +//===----------------------------------------------------------------------===// + +void WebAssemblyPassConfig::addIRPasses() { + // FIXME: the default for this option is currently POSIX, whereas + // WebAssembly's MVP should default to Single. + if (TM->Options.ThreadModel == ThreadModel::Single) + addPass(createLowerAtomicPass()); + else + // Expand some atomic operations. WebAssemblyTargetLowering has hooks which + // control specifically what gets lowered. + addPass(createAtomicExpandPass(TM)); + + TargetPassConfig::addIRPasses(); +} + +bool WebAssemblyPassConfig::addPreISel() { return false; } + +bool WebAssemblyPassConfig::addInstSelector() { + addPass( + createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); + return false; +} + +bool WebAssemblyPassConfig::addILPOpts() { return true; } + +void WebAssemblyPassConfig::addPreRegAlloc() {} + +void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {} + +void WebAssemblyPassConfig::addPostRegAlloc() {} + +void WebAssemblyPassConfig::addPreSched2() {} + +void WebAssemblyPassConfig::addPreEmitPass() {} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h new file mode 100644 index 000000000000..3226edcdc614 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -0,0 +1,51 @@ +// WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the WebAssembly-specific subclass of +/// TargetMachine. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H + +#include "WebAssemblySubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class WebAssemblyTargetMachine final : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; + mutable StringMap<std::unique_ptr<WebAssemblySubtarget>> SubtargetMap; + +public: + WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + ~WebAssemblyTargetMachine() override; + const WebAssemblySubtarget * + getSubtargetImpl(const Function &F) const override; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + /// \brief Get the TargetIRAnalysis for this target. + TargetIRAnalysis getTargetIRAnalysis() override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h new file mode 100644 index 000000000000..ee78b945ada2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h @@ -0,0 +1,67 @@ +//===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the WebAssembly-specific subclass of +/// TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + +class GlobalVariable; + +class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile { +public: + WebAssemblyTargetObjectFile() { + TextSection = nullptr; + DataSection = nullptr; + BSSSection = nullptr; + ReadOnlySection = nullptr; + + StaticCtorSection = nullptr; + StaticDtorSection = nullptr; + LSDASection = nullptr; + EHFrameSection = nullptr; + DwarfAbbrevSection = nullptr; + DwarfInfoSection = nullptr; + DwarfLineSection = nullptr; + DwarfFrameSection = nullptr; + DwarfPubTypesSection = nullptr; + DwarfDebugInlineSection = nullptr; + DwarfStrSection = nullptr; + DwarfLocSection = nullptr; + DwarfARangesSection = nullptr; + DwarfRangesSection = nullptr; + } + + MCSection *getSectionForConstant(SectionKind Kind, + const Constant *C) const override { + return ReadOnlySection; + } + + MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override { + return DataSection; + } + + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp new file mode 100644 index 000000000000..fa88ed526df2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -0,0 +1,28 @@ +//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the WebAssembly-specific TargetTransformInfo +/// implementation. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +#define DEBUG_TYPE "wasmtti" + +TargetTransformInfo::PopcntSupportKind +WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // TODO: Make Math.popcount32 happen in WebAssembly. + return TTI::PSK_Software; +} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h new file mode 100644 index 000000000000..08bd88c06985 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -0,0 +1,87 @@ +//==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file a TargetTransformInfo::Concept conforming object specific +/// to the WebAssembly target machine. +/// +/// It uses the target's detailed information to provide more precise answers to +/// certain TTI queries, while letting the target independent and default TTI +/// implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H + +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include <algorithm> + +namespace llvm { + +class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> { + typedef BasicTTIImplBase<WebAssemblyTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const WebAssemblyTargetMachine *TM; + const WebAssemblySubtarget *ST; + const WebAssemblyTargetLowering *TLI; + + const WebAssemblySubtarget *getST() const { return ST; } + const WebAssemblyTargetLowering *getTLI() const { return TLI; } + +public: + WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F) + : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST), + TLI(Arg.TLI) {} + WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)), + ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} + WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + TM = RHS.TM; + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + TM = std::move(RHS.TM); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + // TODO: Implement more Scalar TTI for WebAssembly + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + // TODO: Implement Vector TTI for WebAssembly + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 6ba897b8636d..9eee4a0f3d82 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, return new X86AsmInstrumentation(STI); } -} // namespace llvm +} // End llvm namespace diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h index 341fc81c0480..19ebcc44f61e 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h @@ -61,6 +61,6 @@ protected: unsigned InitialFrameReg; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index b3066efbab24..7ec02408ffa4 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand { return Kind == Memory && (!Mem.Size || Mem.Size == 32) && getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; } + bool isMemVX32X() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31; + } bool isMemVY32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32) && getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; } + bool isMemVY32X() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31; + } bool isMemVX64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64) && getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; } + bool isMemVX64X() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31; + } bool isMemVY64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64) && getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; } + bool isMemVY64X() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31; + } bool isMemVZ32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32) && getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31; diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5b53fbef3f71..cfc3ee2fb08f 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -69,7 +69,7 @@ namespace X86 { extern Target TheX86_32Target, TheX86_64Target; -} // namespace llvm +} static bool translateInstruction(MCInst &target, InternalInstruction &source, @@ -551,9 +551,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_REL8: isBranch = true; pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; - if(immediate & 0x80) + if (immediate & 0x80) immediate |= ~(0xffull); break; + case TYPE_REL16: + isBranch = true; + pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; + if (immediate & 0x8000) + immediate |= ~(0xffffull); + break; case TYPE_REL32: case TYPE_REL64: isBranch = true; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index d990bf3484bf..f73fa75f888e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1165,35 +1165,30 @@ static int readSIB(struct InternalInstruction* insn) { return -1; index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); + + // FIXME: The fifth bit (bit index 4) is only to be used for instructions + // that understand VSIB indexing. ORing the bit in here is mildy dangerous + // because performing math on an 'enum SIBIndex' can produce garbage. + // Excluding the "none" value, it should cover 6 spaces of register names: + // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI + // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX + // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX + // - 32 possibilities for each of XMM, YMM, ZMM registers + // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode, + // summing in a fully decoded index between 0 and 31 can end up with a value + // that looks like something in the low half of the XMM range. + // translateRMMemory() tries to reverse the damage, with only partial success, + // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt" if (insn->vectorExtensionType == TYPE_EVEX) index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; - switch (index) { - case 0x4: + if (index == 0x4) { insn->sibIndex = SIB_INDEX_NONE; - break; - default: + } else { insn->sibIndex = (SIBIndex)(sibIndexBase + index); - if (insn->sibIndex == SIB_INDEX_sib || - insn->sibIndex == SIB_INDEX_sib64) - insn->sibIndex = SIB_INDEX_NONE; - break; } - switch (scaleFromSIB(insn->sib)) { - case 0: - insn->sibScale = 1; - break; - case 1: - insn->sibScale = 2; - break; - case 2: - insn->sibScale = 4; - break; - case 3: - insn->sibScale = 8; - break; - } + insn->sibScale = 1 << scaleFromSIB(insn->sib); base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index ac484f317276..62b6b73e7864 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -140,6 +140,6 @@ public: private: bool HasCustomInstComment; }; -} // namespace llvm +} #endif diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 2bee518fed68..6e371da37290 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -159,6 +159,6 @@ public: } }; -} // namespace llvm +} #endif diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 2d85f84d6669..3e0dc1424609 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -29,13 +29,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// Option to allow disabling arithmetic relaxation to workaround PR9807, which -// is useful when running bitwise comparison experiments on Darwin. We should be -// able to remove this once PR9807 is resolved. -static cl::opt<bool> -MCDisableArithRelaxation("mc-x86-disable-arith-relaxation", - cl::desc("Disable relaxation of arithmetic instruction for X86")); - static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: @@ -243,29 +236,18 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) return true; - if (MCDisableArithRelaxation) - return false; - // Check if this instruction is ever relaxable. if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode()) return false; - // Check if it has an expression and is not RIP relative. - bool hasExp = false; - bool hasRIP = false; - for (unsigned i = 0; i < Inst.getNumOperands(); ++i) { - const MCOperand &Op = Inst.getOperand(i); - if (Op.isExpr()) - hasExp = true; - - if (Op.isReg() && Op.getReg() == X86::RIP) - hasRIP = true; - } + // Check if the relaxable operand has an expression. For the current set of + // relaxable instructions, the relaxable operand is always the last operand. + unsigned RelaxableOp = Inst.getNumOperands() - 1; + if (Inst.getOperand(RelaxableOp).isExpr()) + return true; - // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on - // how we do relaxations? - return hasExp && !hasRIP; + return false; } bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, @@ -426,7 +408,7 @@ namespace CU { UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF }; -} // namespace CU +} // end CU namespace class DarwinX86AsmBackend : public X86AsmBackend { const MCRegisterInfo &MRI; diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 69e9c7b4a83e..f0d00b0c1bc3 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -41,7 +41,7 @@ namespace X86 { /// AddrNumOperands - Total number of operands in a memory reference. AddrNumOperands = 5 }; -} // namespace X86 +} // end namespace X86; /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -213,11 +213,7 @@ namespace X86II { /// the offset from beginning of section. /// /// This is the TLS offset for the COFF/Windows TLS mechanism. - MO_SECREL, - - /// MO_NOPREFIX - On a symbol operand this indicates that the symbol should - /// not be mangled with a prefix. - MO_NOPREFIX, + MO_SECREL }; enum : uint64_t { @@ -762,8 +758,8 @@ namespace X86II { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } -} // namespace X86II +} -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 512afebf482e..a33468dc4769 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -28,7 +28,7 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; }; -} // namespace +} X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine) diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index 7c09e5d59580..89f394582631 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -26,14 +26,17 @@ public: X86_64ELFRelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} const MCExpr *createExprForRelocation(RelocationRef Rel) override { - uint64_t RelType; Rel.getType(RelType); - symbol_iterator SymI = Rel.getSymbol(); + uint64_t RelType = Rel.getType(); + elf_symbol_iterator SymI = Rel.getSymbol(); + + ErrorOr<StringRef> SymNameOrErr = SymI->getName(); + if (std::error_code EC = SymNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef SymName = *SymNameOrErr; - StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); uint64_t SymSize = SymI->getSize(); - auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile()); - int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl()); + int64_t Addend = *ELFRelocationRef(Rel).getAddend(); MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index a523a32b2a2d..4899900dcef9 100644 --- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h +++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h @@ -28,7 +28,7 @@ enum Fixups { LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; -} // namespace X86 -} // namespace llvm +} +} #endif diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 020803b57f76..6221baba1793 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -62,7 +62,7 @@ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS); -} // namespace X86_MC +} MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, @@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx); /// Construct X86-64 ELF relocation info. MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx); -} // namespace llvm +} // End llvm namespace // Defines symbolic names for X86 registers. This defines a mapping from diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index a5aadd6a385e..c9479b62f7b6 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -25,12 +25,15 @@ public: X86_64MachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} const MCExpr *createExprForRelocation(RelocationRef Rel) override { - const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObjectFile()); + const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObject()); - uint64_t RelType; Rel.getType(RelType); + uint64_t RelType = Rel.getType(); symbol_iterator SymI = Rel.getSymbol(); - StringRef SymName; SymI->getName(SymName); + ErrorOr<StringRef> SymNameOrErr = SymI->getName(); + if (std::error_code EC = SymNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef SymName = *SymNameOrErr; uint64_t SymAddr; SymI->getAddress(SymAddr); any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl()); @@ -89,10 +92,11 @@ public: symbol_iterator RSymI = Rel.getSymbol(); uint64_t RSymAddr; RSymI->getAddress(RSymAddr); - StringRef RSymName; - RSymI->getName(RSymName); + ErrorOr<StringRef> RSymName = RSymI->getName(); + if (std::error_code EC = RSymName.getError()) + report_fatal_error(EC.message()); - MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName); + MCSymbol *RSym = Ctx.getOrCreateSymbol(*RSymName); if (!RSym->isVariable()) RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx)); diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 773fbf41a7b1..9e801fc8f191 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -69,7 +69,7 @@ public: FixedValue); } }; -} // namespace +} static bool isFixupKindRIPRel(unsigned Kind) { return Kind == X86::reloc_riprel_4byte || diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 7d262cdbf51d..bd1bc9943b6d 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -31,7 +31,7 @@ namespace { bool IsCrossSection, const MCAsmBackend &MAB) const override; }; -} // namespace +} X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit) : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64 diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index dc6dd66bcd85..92f42b68ae51 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() { MCWinCOFFStreamer::FinishImpl(); } -} // namespace +} MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, raw_pwrite_stream &OS, diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1e7d94287c4a..ef3318ba7580 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) { for (unsigned i = 1; i < NumElts; i++) Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); } -} // namespace llvm +} // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 0139297fc72d..14b69434806e 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); /// \brief Decode a scalar float move instruction as a shuffle mask. void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &ShuffleMask); -} // namespace llvm +} // llvm namespace #endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 80f457984951..8403ae6101df 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass(); /// must run after prologue/epilogue insertion and before lowering /// the MachineInstr to MC. FunctionPass *createX86ExpandPseudoPass(); -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 205140144ab5..ba33248d2039 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -581,34 +581,6 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const { return AsmPrinter::GetCPISymbol(CPID); } -void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) { - SmallString<128> Directive; - raw_svector_ostream OS(Directive); - StringRef Name = Sym->getName(); - const Triple &TT = TM.getTargetTriple(); - - if (TT.isKnownWindowsMSVCEnvironment()) - OS << " /EXPORT:"; - else - OS << " -export:"; - - if ((TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) && - (Name[0] == getDataLayout().getGlobalPrefix())) - Name = Name.drop_front(); - - OS << Name; - - if (IsData) { - if (TT.isKnownWindowsMSVCEnvironment()) - OS << ",DATA"; - else - OS << ",data"; - } - - OS.flush(); - OutStreamer->EmitBytes(Directive); -} - void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { const Triple &TT = TM.getTargetTriple(); @@ -692,39 +664,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (TT.isOSBinFormatCOFF()) { - // Necessary for dllexport support - std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - for (const auto &Function : M) - if (Function.hasDLLExportStorageClass() && !Function.isDeclaration()) - DLLExportedFns.push_back(getSymbol(&Function)); + std::string Flags; + raw_string_ostream FlagsOS(Flags); + for (const auto &Function : M) + TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function, *Mang); for (const auto &Global : M.globals()) - if (Global.hasDLLExportStorageClass() && !Global.isDeclaration()) - DLLExportedGlobals.push_back(getSymbol(&Global)); - - for (const auto &Alias : M.aliases()) { - if (!Alias.hasDLLExportStorageClass()) - continue; - - if (Alias.getType()->getElementType()->isFunctionTy()) - DLLExportedFns.push_back(getSymbol(&Alias)); - else - DLLExportedGlobals.push_back(getSymbol(&Alias)); - } + TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global, *Mang); + for (const auto &Alias : M.aliases()) + TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias, *Mang); - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - const TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + FlagsOS.flush(); + // Output collected flags. + if (!Flags.empty()) { OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection()); - - for (auto & Symbol : DLLExportedGlobals) - GenerateExportDirective(Symbol, /*IsData=*/true); - for (auto & Symbol : DLLExportedFns) - GenerateExportDirective(Symbol, /*IsData=*/false); + OutStreamer->EmitBytes(Flags); } + + SM.serializeToStackMapSection(); } if (TT.isOSBinFormatELF()) { diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index acba21169c9c..7f5d127c68d5 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -30,8 +30,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { StackMaps SM; FaultMaps FM; - void GenerateExportDirective(const MCSymbol *Sym, bool IsData); - // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow // invariants (i.e. no other stackmaps, patchpoints, or control flow within diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 6d6831b18b0a..031ba4ba9e66 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -78,7 +78,7 @@ private: typedef DenseMap<MachineInstr *, CallContext> ContextMap; bool isLegal(MachineFunction &MF); - + bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap); void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, @@ -90,6 +90,13 @@ private: MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup, unsigned Reg); + enum InstClassification { Convert, Skip, Exit }; + + InstClassification classifyInstruction(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const X86RegisterInfo &RegInfo, + DenseSet<unsigned int> &UsedRegs); + const char *getPassName() const override { return "X86 Optimize Call Frame"; } const TargetInstrInfo *TII; @@ -99,13 +106,13 @@ private: }; char X86CallFrameOptimization::ID = 0; -} // namespace +} FunctionPass *llvm::createX86CallFrameOptimization() { return new X86CallFrameOptimization(); } -// This checks whether the transformation is legal. +// This checks whether the transformation is legal. // Also returns false in cases where it's potentially legal, but // we don't even want to try. bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { @@ -170,9 +177,8 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, if (!OptForSize) return false; - unsigned StackAlign = TFL->getStackAlignment(); - + int64_t Advantage = 0; for (auto CC : CallSeqMap) { // Call sites where no parameters are passed on the stack @@ -205,7 +211,6 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, return (Advantage >= 0); } - bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TFL = MF.getSubtarget().getFrameLowering(); @@ -237,6 +242,64 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { return Changed; } +X86CallFrameOptimization::InstClassification +X86CallFrameOptimization::classifyInstruction( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) { + if (MI == MBB.end()) + return Exit; + + // The instructions we actually care about are movs onto the stack + int Opcode = MI->getOpcode(); + if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr) + return Convert; + + // Not all calling conventions have only stack MOVs between the stack + // adjust and the call. + + // We want to tolerate other instructions, to cover more cases. + // In particular: + // a) PCrel calls, where we expect an additional COPY of the basereg. + // b) Passing frame-index addresses. + // c) Calling conventions that have inreg parameters. These generate + // both copies and movs into registers. + // To avoid creating lots of special cases, allow any instruction + // that does not write into memory, does not def or use the stack + // pointer, and does not def any register that was used by a preceding + // push. + // (Reading from memory is allowed, even if referenced through a + // frame index, since these will get adjusted properly in PEI) + + // The reason for the last condition is that the pushes can't replace + // the movs in place, because the order must be reversed. + // So if we have a MOV32mr that uses EDX, then an instruction that defs + // EDX, and then the call, after the transformation the push will use + // the modified version of EDX, and not the original one. + // Since we are still in SSA form at this point, we only need to + // make sure we don't clobber any *physical* registers that were + // used by an earlier mov that will become a push. + + if (MI->isCall() || MI->mayStore()) + return Exit; + + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + unsigned int Reg = MO.getReg(); + if (!RegInfo.isPhysicalRegister(Reg)) + continue; + if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister())) + return Exit; + if (MO.isDef()) { + for (unsigned int U : UsedRegs) + if (RegInfo.regsOverlap(Reg, U)) + return Exit; + } + } + + return Skip; +} + void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -254,8 +317,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. - unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; - + unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; + // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; @@ -284,11 +347,17 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, if (MaxAdjust > 4) Context.MovVector.resize(MaxAdjust, nullptr); - do { - int Opcode = I->getOpcode(); - if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) - break; + InstClassification Classification; + DenseSet<unsigned int> UsedRegs; + while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) != + Exit) { + if (Classification == Skip) { + ++I; + continue; + } + + // We know the instruction is a MOV32mi/MOV32mr. // We only want movs of the form: // movl imm/r32, k(%esp) // If we run into something else, bail. @@ -323,24 +392,20 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, return; Context.MovVector[StackDisp] = I; - ++I; - } while (I != MBB.end()); - - // We now expect the end of the sequence - a call and a stack adjust. - if (I == MBB.end()) - return; + for (const MachineOperand &MO : I->uses()) { + if (!MO.isReg()) + continue; + unsigned int Reg = MO.getReg(); + if (RegInfo.isPhysicalRegister(Reg)) + UsedRegs.insert(Reg); + } - // For PCrel calls, we expect an additional COPY of the basereg. - // If we find one, skip it. - if (I->isCopy()) { - if (I->getOperand(1).getReg() == - MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg()) - ++I; - else - return; + ++I; } - if (!I->isCall()) + // We now expect the end of the sequence. If we stopped early, + // or reached the end of the block without finding a call, bail. + if (I == MBB.end() || !I->isCall()) return; Context.Call = I; diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h index a377eb6051ae..0eb2494f1d63 100644 --- a/lib/Target/X86/X86CallingConv.h +++ b/lib/Target/X86/X86CallingConv.h @@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, return false; } -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3dc75d76cee3..02645460b6a2 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -2821,7 +2822,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool &IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); @@ -3117,8 +3118,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { } MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); - if (SymName) - MIB.addExternalSymbol(SymName, OpFlags); + if (Symbol) + MIB.addSym(Symbol, OpFlags); else MIB.addGlobalAddress(GV, 0, OpFlags); } diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 8305a0454c80..5eb4faeedff4 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -91,7 +91,7 @@ private: const X86InstrInfo *TII; // Machine instruction info. }; char FixupLEAPass::ID = 0; -} // namespace +} MachineInstr * FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 6f1d8e523732..40b9c8a863a3 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -279,7 +279,7 @@ namespace { void setKillFlags(MachineBasicBlock &MBB) const; }; char FPS::ID = 0; -} // namespace +} FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } @@ -544,7 +544,7 @@ namespace { return V < TE.from; } }; -} // namespace +} #ifndef NDEBUG static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { @@ -1530,7 +1530,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { if (Op.isKill()) moveToTop(FPReg, Inst); else - duplicateToTop(FPReg, FPReg, Inst); + duplicateToTop(FPReg, ScratchFPReg, Inst); // Emit the call. This will pop the operand. BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 2858e86cd0e0..c274c8820149 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -153,6 +153,6 @@ private: bool InEpilogue) const; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index f6785e161188..6b23e62a2d35 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -67,19 +67,19 @@ namespace { const Constant *CP; const BlockAddress *BlockAddr; const char *ES; + MCSymbol *MCSym; int JT; unsigned Align; // CP alignment. unsigned char SymbolFlags; // X86II::MO_* X86ISelAddressMode() - : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), - Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), - JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { - } + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), + Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), + MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {} bool hasSymbolicDisplacement() const { return GV != nullptr || CP != nullptr || ES != nullptr || - JT != -1 || BlockAddr != nullptr; + MCSym != nullptr || JT != -1 || BlockAddr != nullptr; } bool hasBaseOrIndexReg() const { @@ -134,11 +134,16 @@ namespace { dbgs() << ES; else dbgs() << "nul"; + dbgs() << " MCSym "; + if (MCSym) + dbgs() << MCSym; + else + dbgs() << "nul"; dbgs() << " JT" << JT << " Align" << Align << '\n'; } #endif }; -} // namespace +} namespace { //===--------------------------------------------------------------------===// @@ -258,6 +263,10 @@ namespace { else if (AM.ES) { assert(!AM.Disp && "Non-zero displacement is ignored with ES."); Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); + } else if (AM.MCSym) { + assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); + assert(AM.SymbolFlags == 0 && "oo"); + Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); } else if (AM.JT != -1) { assert(!AM.Disp && "Non-zero displacement is ignored with JT."); Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); @@ -310,7 +319,7 @@ namespace { return true; } }; -} // namespace +} bool @@ -604,7 +613,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) { bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM) { // Cannot combine ExternalSymbol displacements with integer offsets. - if (Offset != 0 && AM.ES) + if (Offset != 0 && (AM.ES || AM.MCSym)) return true; int64_t Val = AM.Disp + Offset; CodeModel::Model M = TM.getCodeModel(); @@ -690,6 +699,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); + } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { + AM.MCSym = S->getMCSymbol(); } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); @@ -728,6 +739,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); + } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { + AM.MCSym = S->getMCSymbol(); } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); @@ -1001,7 +1014,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // FIXME: JumpTable and ExternalSymbol address currently don't like // displacements. It isn't very important, but this should be fixed for // consistency. - if (!AM.ES && AM.JT != -1) return true; + if (!(AM.ES || AM.MCSym) && AM.JT != -1) + return true; if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) @@ -1013,13 +1027,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::FRAME_ALLOC_RECOVER: { if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) - if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) - if (ESNode->getOpcode() == ISD::TargetExternalSymbol) { - // Use the symbol and don't prefix it. - AM.ES = ESNode->getSymbol(); - AM.SymbolFlags = X86II::MO_NOPREFIX; - return false; - } + if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { + // Use the symbol and don't prefix it. + AM.MCSym = ESNode->getMCSymbol(); + return false; + } break; } case ISD::Constant: { @@ -1473,6 +1485,7 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { N->getOpcode() != ISD::TargetJumpTable && N->getOpcode() != ISD::TargetGlobalAddress && N->getOpcode() != ISD::TargetExternalSymbol && + N->getOpcode() != ISD::MCSymbol && N->getOpcode() != ISD::TargetBlockAddress) return false; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ce1ca20ee81a..b16bd18aefaa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); - if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { + if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); @@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, return true; } -/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane. -/// -/// This checks a shuffle mask to see if it is performing the same -/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies -/// that it is also not lane-crossing. It may however involve a blend from the -/// same lane of a second vector. -/// -/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is -/// non-trivial to compute in the face of undef lanes. The representation is -/// *not* suitable for use with existing 256-bit shuffles as it will contain -/// entries from both V1 and V2 inputs to the wider mask. -static bool -is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, - SmallVectorImpl<int> &RepeatedMask) { - int LaneSize = 256 / VT.getScalarSizeInBits(); - RepeatedMask.resize(LaneSize, -1); - int Size = Mask.size(); - for (int i = 0; i < Size; ++i) { - if (Mask[i] < 0) - continue; - if ((Mask[i] % Size) / LaneSize != i / LaneSize) - // This entry crosses lanes, so there is no way to model this shuffle. - return false; - - // Ok, handle the in-lane shuffles by detecting if and when they repeat. - if (RepeatedMask[i % LaneSize] == -1) - // This is the first non-undef entry in this slot of a 256-bit lane. - RepeatedMask[i % LaneSize] = - Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size; - else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i]) - // Found a mismatch with the repeated mask. - return false; - } - return true; -} - /// \brief Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// @@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, return DAG.getConstant(Imm, DL, MVT::i8); } -/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask. -/// -/// This helper function produces an 8-bit shuffle immediate corresponding to -/// the ubiquitous shuffle encoding scheme used in x86 instructions for -/// shuffling 8 lanes. -static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, - SelectionDAG &DAG) { - assert(Mask.size() <= 8 && - "Up to 8 elts may be in Imm8 1-bit lane shuffle mask"); - unsigned Imm = 0; - for (unsigned i = 0; i < Mask.size(); ++i) - if (Mask[i] >= 0) - Imm |= (Mask[i] % 2) << i; - return DAG.getConstant(Imm, DL, MVT::i8); -} - /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are @@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, DAG.getConstant(PermMask, DL, MVT::i8)); } -/// \brief Handle lowering 4-lane 128-bit shuffles. -static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef<int> WidenedMask, - SelectionDAG &DAG) { - - assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!"); - // form a 128-bit permutation. - // convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if(WidenedMask[i] == SM_SentinelZero) - return SDValue(); - - // use first element in place of undef musk - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % 4) << (i * 2); - } - - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); -} - /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then /// shuffling each lane. /// @@ -10144,105 +10068,86 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT, - ArrayRef<int> Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - - assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN"); - // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right) - int AlignVal = -1; - for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) { - if (Mask[i] < 0) - continue; - if (Mask[i] < i) - return SDValue(); - if (AlignVal == -1) - AlignVal = Mask[i] - i; - else if (Mask[i] - i != AlignVal) - return SDValue(); - } - // Vector source operands should be swapped - return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1, - DAG.getConstant(AlignVal, DL, MVT::i8)); -} +/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. +static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); -static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, - ArrayRef<int> Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { + // X86 has dedicated unpack instructions that can handle specific blend + // operations: UNPCKH and UNPCKL. + if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); - assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); +} - MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); +/// \brief Handle lowering of 16-lane 32-bit floating point shuffles. +static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - SmallVector<SDValue, 32> VPermMask; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) - VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) : - DAG.getConstant(Mask[i], DL,MaskEltVT)); - SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT, - VPermMask); - if (isSingleInputShuffleMask(Mask)) - return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + // Use dedicated unpack instructions for masks that match their pattern. + if (isShuffleEquivalent(V1, V2, Mask, + {// First 128-bit lane. + 0, 16, 1, 17, 4, 20, 5, 21, + // Second 128-bit lane. + 8, 24, 9, 25, 12, 28, 13, 29})) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, + {// First 128-bit lane. + 2, 18, 3, 19, 6, 22, 7, 23, + // Second 128-bit lane. + 10, 26, 11, 27, 14, 30, 15, 31})) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); - return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); } - -/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. -static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +/// \brief Handle lowering of 8-lane 64-bit integer shuffles. +static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - assert((V1.getSimpleValueType() == MVT::v8f64 || - V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); - assert((V2.getSimpleValueType() == MVT::v8f64 || - V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); + assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - SmallVector<int, 4> WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG)) - return Op; // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - - if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) - return Op; - - if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG)) - return Op; - - // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7 - if (isSingleInputShuffleMask(Mask)) { - if (!is128BitLaneCrossingShuffleMask(VT, Mask)) - return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1, - get1bitLaneShuffleImm8ForMask(Mask, DL, DAG)); + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); - SmallVector<int, 4> RepeatedMask; - if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) - return DAG.getNode(X86ISD::VPERMI, DL, VT, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); - } - return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. -static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - assert((V1.getSimpleValueType() == MVT::v16i32 || - V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); - assert((V2.getSimpleValueType() == MVT::v16i32 || - V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); + assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -10253,39 +10158,16 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, 0, 16, 1, 17, 4, 20, 5, 21, // Second 128-bit lane. 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {// First 128-bit lane. 2, 18, 3, 19, 6, 22, 7, 23, // Second 128-bit lane. 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, - 12, 12, 14, 14})) - return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1); - if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, - 13, 13, 15, 15})) - return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1); - - SmallVector<int, 4> RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) { - if (isSingleInputShuffleMask(Mask)) { - unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI; - return DAG.getNode(Opc, DL, VT, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); - } - - for (int i = 0; i < 4; ++i) - if (RepeatedMask[i] >= 16) - RepeatedMask[i] -= 12; - return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG); - } - - if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) - return Op; - - return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. @@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - case MVT::v8i64: - return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16f32: + return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v8i64: + return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i32: - return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); @@ -10759,11 +10643,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, assert(VecVT.is128BitVector() && "Unexpected vector length"); - if (Subtarget->hasSSE41()) { - SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); - if (Res.getNode()) + if (Subtarget->hasSSE41()) + if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) return Res; - } MVT VT = Op.getSimpleValueType(); // TODO: handle v16i8. @@ -12253,11 +12135,9 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - if (Subtarget->hasFp256()) { - SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); - if (Res.getNode()) + if (Subtarget->hasFp256()) + if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; - } return SDValue(); } @@ -12272,11 +12152,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG); - if (Subtarget->hasFp256()) { - SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); - if (Res.getNode()) + if (Subtarget->hasFp256()) + if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; - } assert(!VT.is256BitVector() || !SVT.is128BitVector() || VT.getVectorNumElements() != SVT.getVectorNumElements()); @@ -15117,6 +14995,54 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); } +/// When the 32-bit MSVC runtime transfers control to us, either to an outlined +/// function or when returning to a parent frame after catching an exception, we +/// recover the parent frame pointer by doing arithmetic on the incoming EBP. +/// Here's the math: +/// RegNodeBase = EntryEBP - RegNodeSize +/// ParentFP = RegNodeBase - RegNodeFrameOffset +/// Subtracting RegNodeSize takes us to the offset of the registration node, and +/// subtracting the offset (negative on x86) takes us back to the parent FP. +static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, + SDValue EntryEBP) { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc dl; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(); + + // It's possible that the parent function no longer has a personality function + // if the exceptional code was optimized away, in which case we just return + // the incoming EBP. + if (!Fn->hasPersonalityFn()) + return EntryEBP; + + // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See + // WinEHStatePass for the full struct definition. + int RegNodeSize; + switch (classifyEHPersonality(Fn->getPersonalityFn())) { + default: + report_fatal_error("can only recover FP for MSVC EH personality functions"); + case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break; + case EHPersonality::MSVC_CXX: RegNodeSize = 16; break; + } + + // Get an MCSymbol that will ultimately resolve to the frame offset of the EH + // registration. + MCSymbol *OffsetSym = + MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(Fn->getName())); + SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); + SDValue RegNodeFrameOffset = + DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal); + + // RegNodeBase = EntryEBP - RegNodeSize + // ParentFP = RegNodeBase - RegNodeFrameOffset + SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, + DAG.getConstant(RegNodeSize, dl, PtrVT)); + return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset); +} + static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); @@ -15206,6 +15132,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1,Src2), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_2OP_MASK_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue PassThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + // We specify 2 possible modes for intrinsics, with/without rounding modes. + // First, we check if the intrinsic have rounding mode (6 operands), + // if not, we set rounding mode to "current". + SDValue Rnd; + if (Op.getNumOperands() == 6) + Rnd = Op.getOperand(5); + else + Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Rnd), + Mask, PassThru, Subtarget, DAG); + } case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -15230,11 +15173,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } + case VPERM_3OP_MASKZ: + case VPERM_3OP_MASK: + case FMA_OP_MASK3: + case FMA_OP_MASKZ: case FMA_OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + EVT VT = Op.getValueType(); + SDValue PassThru = SDValue(); + + // set PassThru element + if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + else if (IntrData->Type == FMA_OP_MASK3) + PassThru = Src3; + else + PassThru = Src1; + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -15246,12 +15204,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), - Mask, Src1, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2, Src3), - Mask, Src1, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); } case CMP_MASK: case CMP_MASK_CC: { @@ -15330,18 +15288,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue PassThru = Op.getOperand(2); if (isAllOnes(Mask)) // return data as is return Op.getOperand(1); - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDLoc dl(Op); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, - PassThru); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + DataToCompress), + Mask, PassThru, Subtarget, DAG); } case BLEND: { SDValue Mask = Op.getOperand(3); @@ -15532,15 +15482,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( GlobalValue::getRealLinkageName(Fn->getName())); - StringRef Name = LSDASym->getName(); - assert(Name.data()[Name.size()] == '\0' && "not null terminated"); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. - SDValue Result = - DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX); + SDValue Result = DAG.getMCSymbol(LSDASym, VT); return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); } + + case Intrinsic::x86_seh_recoverfp: { + SDValue FnOp = Op.getOperand(1); + SDValue IncomingFPOp = Op.getOperand(2); + GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); + auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); + if (!Fn) + report_fatal_error( + "llvm.x86.seh.recoverfp must take a function as the first argument"); + return recoverFramePointer(DAG, Fn, IncomingFPOp); + } } } @@ -15550,7 +15508,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, const X86Subtarget * Subtarget) { SDLoc dl(Op); ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); - assert(C && "Invalid scale type"); + if (!C) + llvm_unreachable("Invalid scale type"); + unsigned ScaleVal = C->getZExtValue(); + if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) + llvm_unreachable("Valid scale values are 1, 2, 4, 8"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); @@ -15558,8 +15521,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); - else - MaskInReg = DAG.getBitcast(MaskVT, Mask); + else { + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); + + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -15576,7 +15547,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); - assert(C && "Invalid scale type"); + if (!C) + llvm_unreachable("Invalid scale type"); + unsigned ScaleVal = C->getZExtValue(); + if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) + llvm_unreachable("Valid scale values are 1, 2, 4, 8"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -15586,8 +15562,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); - else - MaskInReg = DAG.getBitcast(MaskVT, Mask); + else { + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); + + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); @@ -15725,37 +15709,38 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Results, DL); } -static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); SDLoc dl(Op); - SDValue FnOp = Op.getOperand(2); - SDValue FPOp = Op.getOperand(3); + SDValue Chain = Op.getOperand(0); - // Compute the symbol for the parent EH registration. We know it'll get - // emitted later. - auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal()); - MCSymbol *ParentFrameSym = - MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(Fn->getName())); - StringRef Name = ParentFrameSym->getName(); - assert(Name.data()[Name.size()] == '\0' && "not null terminated"); - - // Create a TargetExternalSymbol for the label to avoid any target lowering - // that would make this PC relative. - MVT PtrVT = Op.getSimpleValueType(); - SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); - SDValue OffsetVal = - DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym); - - // Add the offset to the FP. - SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal); - - // Load the second field of the struct, which is 4 bytes in. See - // WinEHStatePass for more info. - Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT)); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(), - false, false, false, 0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT VT = TLI.getPointerTy(); + + const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned FrameReg = + RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); + unsigned SPReg = RegInfo->getStackRegister(); + + // Get incoming EBP. + SDValue IncomingEBP = + DAG.getCopyFromReg(Chain, dl, FrameReg, VT); + + // Load [EBP-24] into SP. + SDValue SPAddr = + DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT)); + SDValue NewSP = + DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false, + false, VT.getScalarSizeInBits() / 8); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); + + // FIXME: Restore the base pointer in case of stack realignment! + + // Adjust EBP to point back to the original frame position. + SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP); + Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP); + return Chain; } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, @@ -15764,8 +15749,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { - if (IntNo == Intrinsic::x86_seh_exceptioninfo) - return LowerEXCEPTIONINFO(Op, Subtarget, DAG); + if (IntNo == llvm::Intrinsic::x86_seh_restoreframe) + return LowerSEHRESTOREFRAME(Op, Subtarget, DAG); return SDValue(); } @@ -15884,16 +15869,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - - SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, - DataToCompress, DAG.getUNDEF(VT)); + SDValue Compressed = + getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), + Mask, DAG.getUNDEF(VT), Subtarget, DAG); return DAG.getStore(Chain, dl, Compressed, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); @@ -15901,7 +15879,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case EXPAND_FROM_MEM: { SDLoc dl(Op); SDValue Mask = Op.getOperand(4); - SDValue PathThru = Op.getOperand(3); + SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); EVT VT = Op.getValueType(); @@ -15909,21 +15887,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (isAllOnes(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); SDValue Results[] = { - DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru), - Chain}; + getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand), + Mask, PassThru, Subtarget, DAG), Chain}; return DAG.getMergeValues(Results, dl); } } @@ -18476,6 +18447,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UMIN: return "X86ISD::UMIN"; case X86ISD::SMAX: return "X86ISD::SMAX"; case X86ISD::SMIN: return "X86ISD::SMIN"; + case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -18618,9 +18590,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; + case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; - case X86ISD::AVG: return "X86ISD::AVG"; + case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; } @@ -18777,7 +18750,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - if (!(Subtarget->hasFMA() || Subtarget->hasFMA4())) + if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512())) return false; VT = VT.getScalarType(); @@ -19962,6 +19935,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Replace 213-type (isel default) FMA3 instructions with 231-type for // accumulator loops. Writing back to the accumulator allows the coalescer // to remove extra copies in the loop. +// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937). MachineBasicBlock * X86TargetLowering::emitFMA3Instr(MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -21302,8 +21276,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); - SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true); - if (LD.getNode()) + if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) return LD; if (isTargetShuffle(N->getOpcode())) { @@ -21451,8 +21424,7 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { /// use 64-bit extracts and shifts. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { - SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI); - if (NewOp.getNode()) + if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; SDValue InputVector = N->getOperand(0); @@ -22895,16 +22867,14 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - if (N->getOpcode() == ISD::SHL) { - SDValue V = PerformSHLCombine(N, DAG); - if (V.getNode()) return V; - } + if (N->getOpcode() == ISD::SHL) + if (SDValue V = PerformSHLCombine(N, DAG)) + return V; - if (N->getOpcode() != ISD::SRA) { - // Try to fold this logical shift into a zero vector. - SDValue V = performShiftToAllZeros(N, DAG, Subtarget); - if (V.getNode()) return V; - } + // Try to fold this logical shift into a zero vector. + if (N->getOpcode() != ISD::SRA) + if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget)) + return V; return SDValue(); } @@ -23284,8 +23254,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget); - if (R.getNode()) + if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; SDValue N0 = N->getOperand(0); @@ -23480,11 +23449,9 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); - if (Subtarget->hasCMov()) { - SDValue RV = performIntegerAbsCombine(N, DAG); - if (RV.getNode()) + if (Subtarget->hasCMov()) + if (SDValue RV = performIntegerAbsCombine(N, DAG)) return RV; - } return SDValue(); } @@ -24266,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - if (VT.isVector()) { - auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) { + if (VT.isVector() && Subtarget->hasSSE2()) { + auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) { EVT InVT = N.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), - 128 / InVT.getScalarSizeInBits()); - SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(), + Size / InVT.getScalarSizeInBits()); + SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(), DAG.getUNDEF(InVT)); Opnds[0] = N; return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds); }; + // If target-size is less than 128-bits, extend to a type that would extend + // to 128 bits, extend that and extract the original target vector. + if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) && + (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && + (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { + unsigned Scale = 128 / VT.getSizeInBits(); + EVT ExVT = + EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); + SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, + DAG.getIntPtrConstant(0, DL)); + } + // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG // which ensures lowering to X86ISD::VSEXT (pmovsx*). if (VT.getSizeInBits() == 128 && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { - SDValue ExOp = ExtendToVec128(DL, N0); + SDValue ExOp = ExtendVecSize(DL, N0, 128); return DAG.getSignExtendVectorInReg(ExOp, DL, VT); } @@ -24301,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, ++i, Offset += NumSubElts) { SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, DAG.getIntPtrConstant(Offset, DL)); - SrcVec = ExtendToVec128(DL, SrcVec); + SrcVec = ExtendVecSize(DL, SrcVec, 128); SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT); Opnds.push_back(SrcVec); } @@ -24312,11 +24293,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, if (!Subtarget->hasFp256()) return SDValue(); - if (VT.isVector() && VT.getSizeInBits() == 256) { - SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); - if (R.getNode()) + if (VT.isVector() && VT.getSizeInBits() == 256) + if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; - } return SDValue(); } @@ -24332,7 +24311,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, EVT ScalarVT = VT.getScalarType(); if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || - (!Subtarget->hasFMA() && !Subtarget->hasFMA4())) + (!Subtarget->hasFMA() && !Subtarget->hasFMA4() && + !Subtarget->hasAVX512())) return SDValue(); SDValue A = N->getOperand(0); @@ -24398,11 +24378,10 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(1, dl, VT)); } } - if (VT.is256BitVector()) { - SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); - if (R.getNode()) + + if (VT.is256BitVector()) + if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; - } // (i8,i32 zext (udivrem (i8 x, i8 y)) -> // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y) @@ -24606,10 +24585,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, if (CC == X86::COND_B) return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0)); - SDValue Flags; - - Flags = checkBoolTestSetCCCombine(EFLAGS, CC); - if (Flags.getNode()) { + if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); } @@ -24628,10 +24604,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, SDValue EFLAGS = N->getOperand(3); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); - SDValue Flags; - - Flags = checkBoolTestSetCCCombine(EFLAGS, CC); - if (Flags.getNode()) { + if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, Flags); @@ -24695,16 +24668,18 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, // Now move on to more general possibilities. SDValue Op0 = N->getOperand(0); - EVT InVT = Op0->getValueType(0); + EVT VT = N->getValueType(0); + EVT InVT = Op0.getValueType(); + EVT InSVT = InVT.getScalarType(); // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) - if (InVT == MVT::v8i8 || InVT == MVT::v4i8 || - InVT == MVT::v8i16 || InVT == MVT::v4i16) { + if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) { SDLoc dl(N); - MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements()); + EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have @@ -24714,10 +24689,10 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, EVT LdVT = Ld->getValueType(0); // This transformation is not supported if the result type is f16 - if (N->getValueType(0) == MVT::f16) + if (VT == MVT::f16) return SDValue(); - if (!Ld->isVolatile() && !N->getValueType(0).isVector() && + if (!Ld->isVolatile() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget->is64Bit() && LdVT == MVT::i64) { SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD( @@ -25683,75 +25658,40 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // Otherwise, check to see if this is a register class of the wrong value // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. - if (Res.second->hasType(VT)) + // MVT::Other is used to specify clobber names. + if (Res.second->hasType(VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. - // All of the single-register GCC register classes map their values onto - // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we - // really want an 8-bit or 32-bit register, map to the appropriate register - // class and return the appropriate register. - if (Res.second == &X86::GR16RegClass) { - if (VT == MVT::i8 || VT == MVT::i1) { - unsigned DestReg = 0; - switch (Res.first) { - default: break; - case X86::AX: DestReg = X86::AL; break; - case X86::DX: DestReg = X86::DL; break; - case X86::CX: DestReg = X86::CL; break; - case X86::BX: DestReg = X86::BL; break; - } - if (DestReg) { - Res.first = DestReg; - Res.second = &X86::GR8RegClass; - } - } else if (VT == MVT::i32 || VT == MVT::f32) { - unsigned DestReg = 0; - switch (Res.first) { - default: break; - case X86::AX: DestReg = X86::EAX; break; - case X86::DX: DestReg = X86::EDX; break; - case X86::CX: DestReg = X86::ECX; break; - case X86::BX: DestReg = X86::EBX; break; - case X86::SI: DestReg = X86::ESI; break; - case X86::DI: DestReg = X86::EDI; break; - case X86::BP: DestReg = X86::EBP; break; - case X86::SP: DestReg = X86::ESP; break; - } - if (DestReg) { - Res.first = DestReg; - Res.second = &X86::GR32RegClass; - } - } else if (VT == MVT::i64 || VT == MVT::f64) { - unsigned DestReg = 0; - switch (Res.first) { - default: break; - case X86::AX: DestReg = X86::RAX; break; - case X86::DX: DestReg = X86::RDX; break; - case X86::CX: DestReg = X86::RCX; break; - case X86::BX: DestReg = X86::RBX; break; - case X86::SI: DestReg = X86::RSI; break; - case X86::DI: DestReg = X86::RDI; break; - case X86::BP: DestReg = X86::RBP; break; - case X86::SP: DestReg = X86::RSP; break; - } - if (DestReg) { - Res.first = DestReg; - Res.second = &X86::GR64RegClass; - } - } else if (VT != MVT::Other) { - // Type mismatch and not a clobber: Return an error; + // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should + // return "eax". This should even work for things like getting 64bit integer + // registers when given an f64 type. + const TargetRegisterClass *Class = Res.second; + if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass || + Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) { + unsigned Size = VT.getSizeInBits(); + MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8 + : Size == 16 ? MVT::i16 + : Size == 32 ? MVT::i32 + : Size == 64 ? MVT::i64 + : MVT::Other; + unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy); + if (DestReg > 0) { + Res.first = DestReg; + Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass + : SimpleTy == MVT::i16 ? &X86::GR16RegClass + : SimpleTy == MVT::i32 ? &X86::GR32RegClass + : &X86::GR64RegClass; + assert(Res.second->contains(Res.first) && "Register in register class"); + } else { + // No register found/type mismatch. Res.first = 0; Res.second = nullptr; } - } else if (Res.second == &X86::FR32RegClass || - Res.second == &X86::FR64RegClass || - Res.second == &X86::VR128RegClass || - Res.second == &X86::VR256RegClass || - Res.second == &X86::FR32XRegClass || - Res.second == &X86::FR64XRegClass || - Res.second == &X86::VR128XRegClass || - Res.second == &X86::VR256XRegClass || - Res.second == &X86::VR512RegClass) { + } else if (Class == &X86::FR32RegClass || Class == &X86::FR64RegClass || + Class == &X86::VR128RegClass || Class == &X86::VR256RegClass || + Class == &X86::FR32XRegClass || Class == &X86::FR64XRegClass || + Class == &X86::VR128XRegClass || Class == &X86::VR256XRegClass || + Class == &X86::VR512RegClass) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can @@ -25767,15 +25707,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::VR256RegClass; else if (X86::VR512RegClass.hasType(VT)) Res.second = &X86::VR512RegClass; - else if (VT != MVT::Other) { + else { // Type mismatch and not a clobber: Return an error; Res.first = 0; Res.second = nullptr; } - } else if (VT != MVT::Other) { - // Type mismatch and not a clobber: Return an error; - Res.first = 0; - Res.second = nullptr; } return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9c98333776cf..17660891635c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -211,7 +211,8 @@ namespace llvm { // FP vector get exponent FGETEXP_RND, - + // FP Scale + SCALEF, // Integer add/sub with unsigned saturation. ADDUS, SUBUS, @@ -238,6 +239,9 @@ namespace llvm { /// Signed integer max and min. SMAX, SMIN, + // Integer absolute value + ABS, + /// Floating point max and min. FMAX, FMIN, @@ -516,7 +520,7 @@ namespace llvm { // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be // thought as target memory ops! }; - } // namespace X86ISD + } /// Define some predicates that are used for node matching. namespace X86 { @@ -583,7 +587,7 @@ namespace llvm { TO_ZERO = 3, CUR_DIRECTION = 4 }; - } // namespace X86 + } //===--------------------------------------------------------------------===// // X86 Implementation of the TargetLowering interface @@ -638,9 +642,8 @@ namespace llvm { /// legal as the hook is used before type legalization. bool isSafeMemOpType(MVT VT) const override; - /// Returns true if the target allows - /// unaligned memory accesses. of the specified type. Returns whether it - /// is "fast" by reference in the second argument. + /// Returns true if the target allows unaligned memory accesses of the + /// specified type. Returns whether it is "fast" in the last argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; @@ -1120,6 +1123,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } -} // namespace llvm +} #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index de6a83506b28..b309b8210851 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -274,6 +274,16 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; +multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common<O, F, _, Outs, + !con((ins _.RC:$src1), NonTiedIns), + !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), + !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), + OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, + (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>; multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -3436,7 +3446,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, "$rc, $src2, $src1", "$src1, $src2, $rc", @@ -3446,7 +3456,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", @@ -3481,16 +3491,16 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>, + defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>, + defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>, + defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>, + defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } @@ -3513,6 +3523,48 @@ let Predicates = [HasDQI] in { defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; } +multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, + "$src2, $src1", "$src1, $src2", + (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V; + defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (OpNode _.RC:$src1, (_.VT (X86VBroadcast + (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} +defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; + //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// @@ -3870,6 +3922,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; + +multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512; + + let Predicates = [HasVLX, HasBWI] in { + defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256; + defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128; + } +} + +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; + //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// @@ -3950,188 +4015,295 @@ let Predicates = [HasAVX512] in { // let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode = null_frag> { +multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, AVX512FMA3Base; - let mayLoad = 1 in - defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + let mayLoad = 1 in { + defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>, AVX512FMA3Base; - defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; - } -} // Constraints = "$src1 = $dst" + } +} -let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, - X86VectorVTInfo _, - SDPatternOperator OpNode> { - defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), +multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>, AVX512FMA3Base, EVEX_B, EVEX_RC; - } +} } // Constraints = "$src1 = $dst" -multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr, - X86VectorVTInfo VTI, SDPatternOperator OpNode> { - defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix), - VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>; +multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>, + avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } } -multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231, - string OpcodeStr, X86VectorVTInfo VTI, - SDPatternOperator OpNode> { - defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix), - VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>; - defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix), - VTI>, EVEX_CD8<VTI.EltSize, CD8VF>; +multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, + avx512vl_f32_info>; + defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, + avx512vl_f64_info>, VEX_W; } -multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231, - string OpcodeStr, - SDPatternOperator OpNode, - SDPatternOperator OpNodeRnd> { -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v16f32_info, OpNode>, - avx512_fma3_round_forms<opc213, OpcodeStr, - v16f32_info, OpNodeRnd>, EVEX_V512; - defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v8f32x_info, OpNode>, EVEX_V256; - defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v4f32x_info, OpNode>, EVEX_V128; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.RC:$src3), + OpcodeStr, "$src3, $src2", "$src2, $src3", + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.MemOp:$src3), + OpcodeStr, "$src3, $src2", "$src2, $src3", + (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.ScalarMemOp:$src3), + OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", + "$src2, ${src3}"##_.BroadcastStr, + (_.VT (OpNode _.RC:$src2, + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + _.RC:$src1))>, AVX512FMA3Base, EVEX_B; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v8f64_info, OpNode>, - avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info, - OpNodeRnd>, EVEX_V512, VEX_W; - defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v4f64x_info, OpNode>, - EVEX_V256, VEX_W; - defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v2f64x_info, OpNode>, - EVEX_V128, VEX_W; +} + +multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", + (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>, + avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>; -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; +multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, + avx512vl_f32_info>; + defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, + avx512vl_f64_info>, VEX_W; +} + +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let mayLoad = 1 in - def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"), - [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), - _.RC:$src3)))]>; - def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2), - !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, - ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"), - [(set _.RC:$dst, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))), - _.RC:$src3))]>, EVEX_B; +multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src3, _.RC:$src2), + OpcodeStr, "$src2, $src3", "$src3, $src2", + (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src3, _.MemOp:$src2), + OpcodeStr, "$src2, $src3", "$src3, $src2", + (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src3, _.ScalarMemOp:$src2), + OpcodeStr, "${src2}"##_.BroadcastStr##", $src3", + "$src3, ${src2}"##_.BroadcastStr, + (_.VT (OpNode _.RC:$src1, + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + _.RC:$src3))>, AVX512FMA3Base, EVEX_B; + } } -} // Constraints = "$src1 = $dst" -multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc), + OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc", + (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode,v16f32_info>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v8f32x_info>, EVEX_V256, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v4f32x_info>, EVEX_V128, - EVEX_CD8<32, CD8VF>; +multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>, + avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v8f64_info>, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v4f64x_info>, EVEX_V256, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v2f64x_info>, EVEX_V128, - VEX_W, EVEX_CD8<32, CD8VF>; + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; -defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; -defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; -defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; -defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; -defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; +multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, + avx512vl_f32_info>; + defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, + avx512vl_f64_info>, VEX_W; +} + +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, - PatFrag mem_frag> { - let isCommutable = 1 in - def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set RC:$dst, - (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; +multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, + dag RHS_r, dag RHS_m > { + defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.RC:$src3), OpcodeStr, + "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base; + let mayLoad = 1 in - def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, f128mem:$src3), - !strconcat(OpcodeStr, + defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, + "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base; + + defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>, + AVX512FMA3Base, EVEX_B, EVEX_RC; + + let isCodeGenOnly = 1 in { + def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set RC:$dst, - (OpVT (OpNode RC:$src2, RC:$src1, - (mem_frag addr:$src3))))]>; + [RHS_r]>; + let mayLoad = 1 in + def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [RHS_m]>; + }// isCodeGenOnly = 1 +} +}// Constraints = "$src1 = $dst" + +multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ , + string SUFF> { + + defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ , + (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), + (_.VT (OpNode _.RC:$src2, _.RC:$src1, + (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))), + (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3, + (i32 imm:$rc))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, + _.FRC:$src3))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, + (_.ScalarLdFrag addr:$src3))))>; + + defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ , + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), + (_.VT (OpNode _.RC:$src2, + (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), + _.RC:$src1)), + (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1, + (i32 imm:$rc))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, + _.FRC:$src1))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, + (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>; + + defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ , + (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), + (_.VT (OpNode _.RC:$src1, + (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), + _.RC:$src2)), + (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2, + (i32 imm:$rc))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, + _.FRC:$src2))), + (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>; +} + +multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{ + let Predicates = [HasAVX512] in { + defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, + OpNodeRnd, f32x_info, "SS">, + EVEX_CD8<32, CD8VT1>, VEX_LIG; + defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, + OpNodeRnd, f64x_info, "SD">, + EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; + } } -} // Constraints = "$src1 = $dst" -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double @@ -5427,10 +5599,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag GatherNode> { - let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", + ExeDomain = _.ExeDomain in def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb), (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr#_.Suffix, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [(set _.RC:$dst, _.KRCWM:$mask_wb, (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask, @@ -5438,67 +5611,104 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; +multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, + vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, + vz64mem, mgatherv8i64>, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, + vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, + vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, + vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, + vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W; +} +} + +multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem, + mgatherv16i32>, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem, + mgatherv8i64>, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, + vy32xmem, mgatherv8i32>, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, + vy64xmem, mgatherv4i64>, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, + vx32xmem, mgatherv4i32>, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, + vx64xmem, mgatherv2i64>, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; } -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; +defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, + avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; + +defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, + avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag ScatterNode> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb), (ins memop:$dst, _.KRCWM:$mask, _.RC:$src), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr#_.Suffix, "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src), _.KRCWM:$mask, vectoraddr:$dst))]>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; +multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, + vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, + vz64mem, mscatterv8i64>, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, + vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, + vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, + vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, + vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W; +} +} + +multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem, + mscatterv16i32>, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem, + mscatterv8i64>, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, + vy32xmem, mscatterv8i32>, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, + vy64xmem, mscatterv4i64>, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, + vx32xmem, mscatterv4i32>, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, + vx64xmem, mscatterv2i64>, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem, - mscatterv16i32>, EVEX_V512; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem, - mscatterv16i32>, EVEX_V512; +defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, + avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, + avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, @@ -5599,77 +5809,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, X86MemOperand x86scalar_mop, - string BrdcstStr> { - def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, EVEX; - def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - []>, EVEX, EVEX_K; - def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src), - !strconcat(OpcodeStr, - "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"), - []>, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, EVEX; - def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins KRC:$mask, x86memop:$src), - !strconcat(OpcodeStr, - "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - []>, EVEX, EVEX_K; - def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins KRC:$mask, x86memop:$src), - !strconcat(OpcodeStr, - "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"), - []>, EVEX, EVEX_KZ; - def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins x86scalar_mop:$src), - !strconcat(OpcodeStr, "\t{${src}", BrdcstStr, - ", $dst|$dst, ${src}", BrdcstStr, "}"), - []>, EVEX, EVEX_B; - def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins KRC:$mask, x86scalar_mop:$src), - !strconcat(OpcodeStr, "\t{${src}", BrdcstStr, - ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"), - []>, EVEX, EVEX_B, EVEX_K; - def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), - (ins KRC:$mask, x86scalar_mop:$src), - !strconcat(OpcodeStr, "\t{${src}", BrdcstStr, - ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}", - BrdcstStr, "}"), - []>, EVEX, EVEX_B, EVEX_KZ; - } -} - -defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512, - i512mem, i32mem, "{1to16}">, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512, - i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -def : Pat<(xor - (bc_v16i32 (v16i1sextv16i32)), - (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1))), - (VPABSDZrr VR512:$src)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPABSQZrr VR512:$src)>; - multiclass avx512_conflict<bits<8> opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, @@ -5868,26 +6007,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // + multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", - [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, - _.ImmAllZerosV)))]>, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", - [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, - _.RC:$src0)))]>, EVEX_K; + defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), + (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", + (_.VT (X86compress _.RC:$src1))>, AVX5128IBase; let mayStore = 1 in { + def mr : AVX5128I<opc, MRMDestMem, (outs), + (ins _.MemOp:$dst, _.RC:$src), + OpcodeStr # "\t{$src, $dst |$dst, $src}", + []>, EVEX_CD8<_.EltSize, CD8VT1>; + def mrk : AVX5128I<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", - [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)), + [(store (_.VT (vselect _.KRCWM:$mask, + (_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)), addr:$dst)]>, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -5915,37 +6052,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info // expand multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", - [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src), - _.ImmAllZerosV)))]>, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", - [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, - (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K; - - let mayLoad = 1, Constraints = "$src0 = $dst" in - def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src), - OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", - [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, - (_.VT (bitconvert - (_.LdFrag addr:$src))), - _.RC:$src0)))]>, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", + (_.VT (X86expand _.RC:$src1))>, AVX5128IBase; let mayLoad = 1 in - def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.MemOp:$src), - OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", - [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, - (_.VT (bitconvert (_.LdFrag addr:$src))), - _.ImmAllZerosV)))]>, - EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; + defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", + (_.VT (X86expand (_.VT (bitconvert + (_.LdFrag addr:$src1)))))>, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, @@ -6175,3 +6291,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1), OpcodeStr##_.Suffix, + "$src1", "$src1", + (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase; + + let mayLoad = 1 in + defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.MemOp:$src1), OpcodeStr##_.Suffix, + "$src1", "$src1", + (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> : + avx512_unary_rm<opc, OpcodeStr, OpNode, _> { + let mayLoad = 1 in + defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix, + "${src1}"##_.BroadcastStr, + "${src1}"##_.BroadcastStr, + (_.VT (OpNode (X86VBroadcast + (_.ScalarLdFrag addr:$src1))))>, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>, + EVEX_V256; + defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>, + EVEX_V128; + } +} + +multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>, + EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>, + EVEX_V256; + defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>, + EVEX_V128; + } +} + +multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info, + prd>, VEX_W; + defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>; +} + +multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>; + defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>; +} + +multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode> { + defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, + HasAVX512>, + avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, + HasBWI>; +} + +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; + +def : Pat<(xor + (bc_v16i32 (v16i1sextv16i32)), + (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), + (VPABSDZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v8i1sextv8i64)), + (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), + (VPABSQZrr VR512:$src)>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index eb4dc48a7a65..2056056d23a5 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0); } -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 912a0fb356ed..7f850d6830e1 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -869,6 +869,7 @@ def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; +def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>; def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), @@ -879,6 +880,8 @@ def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), (ADD32ri GR32:$src1, tglobaladdr:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), (ADD32ri GR32:$src1, texternalsym:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)), + (ADD32ri GR32:$src1, mcsym:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), (ADD32ri GR32:$src1, tblockaddress:$src2)>; @@ -886,6 +889,8 @@ def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV32mi addr:$dst, tglobaladdr:$src)>; def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV32mi addr:$dst, texternalsym:$src)>; +def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst), + (MOV32mi addr:$dst, mcsym:$src)>; def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV32mi addr:$dst, tblockaddress:$src)>; @@ -900,6 +905,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper mcsym:$dst)), + (MOV64ri mcsym:$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; @@ -914,6 +921,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper mcsym:$dst)), + (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; @@ -932,12 +941,15 @@ def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst), + (MOV64mi32 addr:$dst, mcsym:$src)>, + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV64mi32 addr:$dst, tblockaddress:$src)>, Requires<[NearData, IsStatic]>; -def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; -def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>; +def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>; +def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>; // Calls diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 0dd05d8befd6..49068e9c37d3 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -633,16 +633,16 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>; def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>; def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>; -def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), - "fxsave\t$dst", [], IIC_FXSAVE>, TB; -def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), - "fxsave64\t$dst", [], IIC_FXSAVE>, TB, - Requires<[In64BitMode]>; +def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; +def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], + IIC_FXSAVE>, TB, Requires<[In64BitMode]>; def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor\t$src", [], IIC_FXRSTOR>, TB; + "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB; def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor64\t$src", [], IIC_FXRSTOR>, TB, - Requires<[In64BitMode]>; + "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], + IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 16ae77dd81a3..fe245c3a7e38 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; +def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; @@ -310,6 +311,7 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; +def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; @@ -347,12 +349,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; -def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, - [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, - SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; -def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, - [SDTCisSameAs<0, 3>, - SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; +def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; +def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; @@ -561,6 +561,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), return false; }]>; +def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_gather node:$src1, node:$src2, node:$src3) , [{ + if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) + return (Mgt->getIndex().getValueType() == MVT::v4i32 || + Mgt->getBasePtr().getValueType() == MVT::v4i32); + return false; +}]>; + def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) @@ -569,6 +577,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), return false; }]>; +def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_gather node:$src1, node:$src2, node:$src3) , [{ + if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) + return (Mgt->getIndex().getValueType() == MVT::v2i64 || + Mgt->getBasePtr().getValueType() == MVT::v2i64); + return false; +}]>; +def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_gather node:$src1, node:$src2, node:$src3) , [{ + if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) + return (Mgt->getIndex().getValueType() == MVT::v4i64 || + Mgt->getBasePtr().getValueType() == MVT::v4i64); + return false; +}]>; def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) @@ -584,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), return false; }]>; +def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) + return (Sc->getIndex().getValueType() == MVT::v2i64 || + Sc->getBasePtr().getValueType() == MVT::v2i64); + return false; +}]>; + +def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) + return (Sc->getIndex().getValueType() == MVT::v4i32 || + Sc->getBasePtr().getValueType() == MVT::v4i32); + return false; +}]>; + +def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) + return (Sc->getIndex().getValueType() == MVT::v4i64 || + Sc->getBasePtr().getValueType() == MVT::v4i64); + return false; +}]>; + def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_scatter node:$src1, node:$src2, node:$src3) , [{ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4aa0ae6f1959..b92ba99fb100 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1577,38 +1577,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPXORYrr, X86::VPXORYrm, 0 }, // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 }, - { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 }, - { X86::VFMADDPS4rr, X86::VFMADDPS4mr, 0 }, - { X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 }, - { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, 0 }, - { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, 0 }, - { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 }, - { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 }, - { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, 0 }, - { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 }, - { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, 0 }, - { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, 0 }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 }, - { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, 0 }, - { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, 0 }, - { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, 0 }, - { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, 0 }, - { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 }, - { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 }, - { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, 0 }, - { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, 0 }, - { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, 0 }, - { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, 0 }, - { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, 0 }, - { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, 0 }, - { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, 0 }, - { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, 0 }, - { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, 0 }, - { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, 0 }, - { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, 0 }, - { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, 0 }, + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_NONE }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_NONE }, // XOP foldable instructions { X86::VPCMOVrr, X86::VPCMOVmr, 0 }, @@ -1852,38 +1852,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE }, // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 }, - { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 }, - { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, - { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, - { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, - { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, - { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 }, - { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 }, - { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, - { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, - { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, - { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 }, - { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, - { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, - { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, - { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, - { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 }, - { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 }, - { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, - { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, - { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, - { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 }, - { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 }, - { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 }, - { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 }, - { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 }, - { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 }, - { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, - { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, - { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_NONE }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_NONE }, // XOP foldable instructions { X86::VPCMOVrr, X86::VPCMOVrm, 0 }, @@ -5295,21 +5295,57 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Size, Alignment, /*AllowCommute=*/true); } -static bool isPartialRegisterLoad(const MachineInstr &LoadMI, - const MachineFunction &MF) { +/// Check if \p LoadMI is a partial register load that we can't fold into \p MI +/// because the latter uses contents that wouldn't be defined in the folded +/// version. For instance, this transformation isn't legal: +/// movss (%rdi), %xmm0 +/// addps %xmm0, %xmm0 +/// -> +/// addps (%rdi), %xmm0 +/// +/// But this one is: +/// movss (%rdi), %xmm0 +/// addss %xmm0, %xmm0 +/// -> +/// addss (%rdi), %xmm0 +/// +static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, + const MachineInstr &UserMI, + const MachineFunction &MF) { unsigned Opc = LoadMI.getOpcode(); + unsigned UserOpc = UserMI.getOpcode(); unsigned RegSize = MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); - if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) + if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) { // These instructions only load 32 bits, we can't fold them if the - // destination register is wider than 32 bits (4 bytes). - return true; + // destination register is wider than 32 bits (4 bytes), and its user + // instruction isn't scalar (SS). + switch (UserOpc) { + case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: + case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: + case X86::MULSSrr_Int: case X86::VMULSSrr_Int: + case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: + return false; + default: + return true; + } + } - if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) + if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) { // These instructions only load 64 bits, we can't fold them if the - // destination register is wider than 64 bits (8 bytes). - return true; + // destination register is wider than 64 bits (8 bytes), and its user + // instruction isn't scalar (SD). + switch (UserOpc) { + case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: + case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: + case X86::MULSDrr_Int: case X86::VMULSDrr_Int: + case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: + return false; + default: + return true; + } + } return false; } @@ -5321,7 +5357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( unsigned NumOps = LoadMI->getDesc().getNumOperands(); int FrameIndex; if (isLoadFromStackSlot(LoadMI, FrameIndex)) { - if (isPartialRegisterLoad(*LoadMI, MF)) + if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF)) return nullptr; return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex); } @@ -5434,7 +5470,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( break; } default: { - if (isPartialRegisterLoad(*LoadMI, MF)) + if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF)) return nullptr; // Folding a normal load. Just copy the load's address operands. @@ -6334,22 +6370,11 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel, return isHighLatencyDef(DefMI->getOpcode()); } -/// If the input instruction is part of a chain of dependent ops that are -/// suitable for reassociation, return the earlier instruction in the sequence -/// that defines its first operand, otherwise return a nullptr. -/// If the instruction's operands must be commuted to be considered a -/// reassociation candidate, Commuted will be set to true. -static MachineInstr *isReassocCandidate(const MachineInstr &Inst, - unsigned AssocOpcode, - bool checkPrevOneUse, - bool &Commuted) { - if (Inst.getOpcode() != AssocOpcode) - return nullptr; - - MachineOperand Op1 = Inst.getOperand(1); - MachineOperand Op2 = Inst.getOperand(2); - - const MachineBasicBlock *MBB = Inst.getParent(); +static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, + const MachineBasicBlock *MBB) { + assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators"); + const MachineOperand &Op1 = Inst.getOperand(1); + const MachineOperand &Op2 = Inst.getOperand(2); const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); // We need virtual register definitions. @@ -6359,80 +6384,99 @@ static MachineInstr *isReassocCandidate(const MachineInstr &Inst, MI1 = MRI.getUniqueVRegDef(Op1.getReg()); if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); - + // And they need to be in the trace (otherwise, they won't have a depth). - if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB) - return nullptr; - - Commuted = false; - if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) { + if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB) + return true; + + return false; +} + +static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) std::swap(MI1, MI2); - Commuted = true; - } - // Avoid reassociating operands when it won't provide any benefit. If both - // operands are produced by instructions of this type, we may already - // have the optimal sequence. - if (MI2->getOpcode() == AssocOpcode) - return nullptr; - - // The instruction must only be used by the other instruction that we - // reassociate with. - if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return nullptr; - - // We must match a simple chain of dependent ops. - // TODO: This check is not necessary for the earliest instruction in the - // sequence. Instead of a sequence of 3 dependent instructions with the same - // opcode, we only need to find a sequence of 2 dependent instructions with - // the same opcode plus 1 other instruction that adds to the height of the - // trace. - if (MI1->getOpcode() != AssocOpcode) - return nullptr; + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + if (MI1->getOpcode() == AssocOpcode && + hasVirtualRegDefsInBasicBlock(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) + return true; - return MI1; + return false; } -/// Select a pattern based on how the operands of each associative operation -/// need to be commuted. -static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev, - bool CommuteRoot) { - if (CommutePrev) { - if (CommuteRoot) - return MachineCombinerPattern::MC_REASSOC_XA_YB; - return MachineCombinerPattern::MC_REASSOC_XA_BY; - } else { - if (CommuteRoot) - return MachineCombinerPattern::MC_REASSOC_AX_YB; - return MachineCombinerPattern::MC_REASSOC_AX_BY; - } +/// Return true if the input instruction is part of a chain of dependent ops +/// that are suitable for reassociation, otherwise return false. +/// If the instruction's operands must be commuted to have a previous +/// instruction of the same type define the first source operand, Commuted will +/// be set to true. +static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode, + bool &Commuted) { + // 1. The instruction must have the correct type. + // 2. The instruction must have virtual register definitions for its + // operands in the same basic block. + // 3. The instruction must have a reassociatable sibling. + if (Inst.getOpcode() == AssocOpcode && + hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && + hasReassocSibling(Inst, Commuted)) + return true; + + return false; } +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const { if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) return false; + // TODO: There is nothing x86-specific here except the instruction type. + // This logic could be hoisted into the machine combiner pass itself. + + // Look for this reassociation pattern: + // B = A op X (Prev) + // C = B op Y (Root) + // TODO: There are many more associative instruction types to match: // 1. Other forms of scalar FP add (non-AVX) // 2. Other data types (double, integer, vectors) // 3. Other math / logic operations (mul, and, or) unsigned AssocOpcode = X86::VADDSSrr; - // TODO: There is nothing x86-specific here except the instruction type. - // This logic could be hoisted into the machine combiner pass itself. - bool CommuteRoot; - if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true, - CommuteRoot)) { - bool CommutePrev; - if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. - Patterns.push_back(getPattern(CommutePrev, CommuteRoot)); - return true; + bool Commute = false; + if (isReassocCandidate(Root, AssocOpcode, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); } + return true; } - + return false; } @@ -6525,14 +6569,16 @@ void X86InstrInfo::genAlternativeCodeSequence( // Select the previous instruction in the sequence based on the input pattern. MachineInstr *Prev = nullptr; - if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY || - Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY) - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB || - Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB) - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - else - llvm_unreachable("Unknown pattern for machine combiner"); + switch (Pattern) { + case MachineCombinerPattern::MC_REASSOC_AX_BY: + case MachineCombinerPattern::MC_REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::MC_REASSOC_AX_YB: + case MachineCombinerPattern::MC_REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + } + assert(Prev && "Unknown pattern for machine combiner"); reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); return; @@ -6604,7 +6650,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} char CGBR::ID = 0; FunctionPass* @@ -6716,7 +6762,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} // namespace +} char LDTLSCleanup::ID = 0; FunctionPass* diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 4912951140d9..bf63336c7005 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -90,7 +90,7 @@ namespace X86 { /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(CondCode CC); -} // namespace X86 +} // end namespace X86; /// isGlobalStubReference - Return true if the specified TargetFlag operand is @@ -512,6 +512,6 @@ private: int &FrameIndex) const; }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index e936b4bc466e..6f38cb8eaf33 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in { def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; } def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; } def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; } + def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; } + def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; } + def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; } + def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; } } def X86AbsMemAsmOperand : AsmOperandClass { @@ -332,7 +336,11 @@ def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>; def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>; def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>; def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>; -def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>; + +def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>; +def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>; +def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>; +def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>; def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>; def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 95629184f2cf..2a896dfe8aa8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7860,10 +7860,11 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, int_x86_avx2_vbroadcast_sd_pd_256, WriteFShuffle256>, VEX_L; -let Predicates = [HasAVX2] in -def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256, - i128mem, v4i64, loadv2i64, - WriteLoad>, VEX_L; +let mayLoad = 1, Predicates = [HasAVX2] in +def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), + (ins i128mem:$src), + "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, + Sched<[WriteLoad]>, VEX, VEX_L; let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 2b829301e327..61a33484b8bf 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,8 +21,9 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, - INTR_TYPE_3OP_MASK, FMA_OP_MASK, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, + INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, + VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND }; @@ -55,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0), + X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0), X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0), @@ -129,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0), - - X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, - X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, - X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm), - X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, - X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, - X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm), + X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm, + X86::VSCATTERPF1DPDm), + X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm, + X86::VSCATTERPF1DPSm), + X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm, + X86::VSCATTERPF1QPDm), + X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm, + X86::VSCATTERPF1QPSm), + X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), @@ -251,6 +283,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), + + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB, + X86ISD::FMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB, + X86ISD::FMSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD, + X86ISD::FMSUBADD_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD, + X86ISD::FMSUBADD_RND), + + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD, @@ -382,9 +460,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM, X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, X86ISD::FGETEXP_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), @@ -393,7 +471,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, - X86ISD::FMAX_RND), + X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX, X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX, @@ -405,7 +483,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, - X86ISD::FMIN_RND), + X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN, @@ -428,6 +506,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), @@ -581,6 +671,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), @@ -633,6 +729,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, @@ -667,12 +775,181 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), + + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD, + X86ISD::FNMADD_RND), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD, + X86ISD::FNMADD_RND), + + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, + X86ISD::FNMSUB_RND), + + + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK, + X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK, + X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86ISD::FMADD_RND), + + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ, + X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), @@ -696,54 +973,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, - X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD, - X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB, - X86ISD::FMADDSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB, - X86ISD::FMADDSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB, - X86ISD::FMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB, - X86ISD::FMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD, - X86ISD::FMSUBADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD, - X86ISD::FMSUBADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD, - X86ISD::FNMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD, - X86ISD::FNMADD_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB, - X86ISD::FNMSUB_RND), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), - X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, - X86ISD::FNMSUB_RND), X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0), diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 64135e0f53e5..3415cedc6fea 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -112,7 +112,7 @@ namespace llvm { OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); SMShadowTracker.count(Inst, getSubtargetInfo()); } -} // namespace llvm +} // end llvm namespace X86MCInstLower::X86MCInstLower(const MachineFunction &mf, X86AsmPrinter &asmprinter) @@ -159,10 +159,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { const GlobalValue *GV = MO.getGlobal(); AsmPrinter.getNameWithPrefix(Name, GV); } else if (MO.isSymbol()) { - if (MO.getTargetFlags() == X86II::MO_NOPREFIX) - Name += MO.getSymbolName(); - else - getMang()->getNameWithPrefix(Name, MO.getSymbolName()); + Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL); } else if (MO.isMBB()) { assert(Suffix.empty()); Sym = MO.getMBB()->getSymbol(); @@ -241,7 +238,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: - case X86II::MO_NOPREFIX: break; case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; @@ -423,6 +419,8 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); + case MachineOperand::MO_MCSymbol: + return LowerSymbolOperand(MO, MO.getMCSymbol()); case MachineOperand::MO_JumpTableIndex: return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); case MachineOperand::MO_ConstantPoolIndex: diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 342d26ab1fbb..d598b55aae3e 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -179,6 +179,6 @@ public: } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 33aa78ffdf8a..143e70bda9e7 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -84,7 +84,7 @@ namespace { }; char PadShortFunc::ID = 0; -} // namespace +} FunctionPass *llvm::createX86PadShortFunctions() { return new PadShortFunc(); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 00e213423974..0033b5058187 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -598,10 +598,10 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { } namespace llvm { -unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, - bool High) { +unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT, + bool High) { switch (VT) { - default: llvm_unreachable("Unexpected VT"); + default: return 0; case MVT::i8: if (High) { switch (Reg) { @@ -625,7 +625,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, } } else { switch (Reg) { - default: llvm_unreachable("Unexpected register"); + default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AL; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -662,7 +662,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, } case MVT::i16: switch (Reg) { - default: llvm_unreachable("Unexpected register"); + default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -698,7 +698,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, } case MVT::i32: switch (Reg) { - default: llvm_unreachable("Unexpected register"); + default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::EAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -734,7 +734,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, } case MVT::i64: switch (Reg) { - default: llvm_unreachable("Unexpected register"); + default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::RAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -771,6 +771,14 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, } } +unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, + bool High) { + unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High); + if (Res == 0) + llvm_unreachable("Unexpected register or VT"); + return Res; +} + unsigned get512BitSuperRegister(unsigned Reg) { if (Reg >= X86::XMM0 && Reg <= X86::XMM31) return X86::ZMM0 + (Reg - X86::XMM0); @@ -781,4 +789,4 @@ unsigned get512BitSuperRegister(unsigned Reg) { llvm_unreachable("Unexpected SIMD register"); } -} // namespace llvm +} diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 459ecf7fff72..8de1d0bf8ec8 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -128,14 +128,19 @@ public: unsigned getSlotSize() const { return SlotSize; } }; -// getX86SubSuperRegister - X86 utility function. It returns the sub or super -// register of a specific X86 register. -// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX +/// Returns the sub or super register of a specific X86 register. +/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX. +/// Aborts on error. unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false); +/// Returns the sub or super register of a specific X86 register. +/// Like getX86SubSuperRegister() but returns 0 on error. +unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType, + bool High = false); + //get512BitRegister - X86 utility - returns 512-bit super register unsigned get512BitSuperRegister(unsigned Reg); -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 25606d3f5df3..eb7e0ed9de6c 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -48,6 +48,6 @@ public: MachinePointerInfo SrcPtrInfo) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 6934061c6922..d420abbe1433 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -490,6 +490,6 @@ public: } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 3d6eb4f7ce02..fb9cb4ba4c86 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -110,12 +110,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, if (Subtarget.isTargetWin64()) this->Options.TrapUnreachable = true; - // TODO: By default, all reciprocal estimate operations are off because - // that matches the behavior before TargetRecip was added (except for btver2 - // which used subtarget features to enable this type of codegen). - // We should change this to match GCC behavior where everything but - // scalar division estimates are turned on by default with -ffast-math. - this->Options.Reciprocals.setDefaults("all", false, 1); + // By default (and when -ffast-math is on), enable estimate codegen for + // everything except scalar division. By default, use 1 refinement step for + // all operations. Defaults may be overridden by using command-line options. + // Scalar division estimates are disabled because they break too much + // real-world code. These defaults match GCC behavior. + this->Options.Reciprocals.setDefaults("sqrtf", true, 1); + this->Options.Reciprocals.setDefaults("divf", false, 1); + this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1); + this->Options.Reciprocals.setDefaults("vec-divf", true, 1); initAsmInfo(); } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index be56888b75f4..262955698e44 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -44,6 +44,6 @@ public: } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index f9f62904b64b..6f900ea351ef 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -131,52 +131,44 @@ static std::string APIntToHexString(const APInt &AI) { return HexString; } - static std::string scalarConstantToHexString(const Constant *C) { Type *Ty = C->getType(); - APInt AI; if (isa<UndefValue>(C)) { - AI = APInt(Ty->getPrimitiveSizeInBits(), /*val=*/0); - } else if (Ty->isFloatTy() || Ty->isDoubleTy()) { - const auto *CFP = cast<ConstantFP>(C); - AI = CFP->getValueAPF().bitcastToAPInt(); - } else if (Ty->isIntegerTy()) { - const auto *CI = cast<ConstantInt>(C); - AI = CI->getValue(); + return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); + } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) { + return APIntToHexString(CFP->getValueAPF().bitcastToAPInt()); + } else if (const auto *CI = dyn_cast<ConstantInt>(C)) { + return APIntToHexString(CI->getValue()); } else { - llvm_unreachable("unexpected constant pool element type!"); + unsigned NumElements; + if (isa<VectorType>(Ty)) + NumElements = Ty->getVectorNumElements(); + else + NumElements = Ty->getArrayNumElements(); + std::string HexString; + for (int I = NumElements - 1, E = -1; I != E; --I) + HexString += scalarConstantToHexString(C->getAggregateElement(I)); + return HexString; } - return APIntToHexString(AI); } MCSection * X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind, const Constant *C) const { - if (Kind.isReadOnly()) { - if (C) { - Type *Ty = C->getType(); - SmallString<32> COMDATSymName; - if (Ty->isFloatTy() || Ty->isDoubleTy()) { - COMDATSymName = "__real@"; - COMDATSymName += scalarConstantToHexString(C); - } else if (const auto *VTy = dyn_cast<VectorType>(Ty)) { - uint64_t NumBits = VTy->getBitWidth(); - if (NumBits == 128 || NumBits == 256) { - COMDATSymName = NumBits == 128 ? "__xmm@" : "__ymm@"; - for (int I = VTy->getNumElements() - 1, E = -1; I != E; --I) - COMDATSymName += - scalarConstantToHexString(C->getAggregateElement(I)); - } - } - if (!COMDATSymName.empty()) { - unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_LNK_COMDAT; - return getContext().getCOFFSection(".rdata", Characteristics, Kind, - COMDATSymName, - COFF::IMAGE_COMDAT_SELECT_ANY); - } - } + if (Kind.isMergeableConst() && C) { + const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_LNK_COMDAT; + std::string COMDATSymName; + if (Kind.isMergeableConst4() || Kind.isMergeableConst8()) + COMDATSymName = "__real@" + scalarConstantToHexString(C); + else if (Kind.isMergeableConst16()) + COMDATSymName = "__xmm@" + scalarConstantToHexString(C); + + if (!COMDATSymName.empty()) + return getContext().getCOFFSection(".rdata", Characteristics, Kind, + COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ANY); } return TargetLoweringObjectFile::getSectionForConstant(Kind, C); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 13384fab5985..0c82a700952b 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1130,3 +1130,18 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) { return isLegalMaskedLoad(DataType, Consecutive); } +bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + // Work this as a subsetting of subtarget features. + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // FIXME: This is likely too limiting as it will include subtarget features + // that we might not care about for inlining, but it is conservatively + // correct. + return (CallerBits & CalleeBits) == CalleeBits; +} diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index e570bb55710a..a83158440193 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -103,6 +103,8 @@ public: Type *Ty); bool isLegalMaskedLoad(Type *DataType, int Consecutive); bool isLegalMaskedStore(Type *DataType, int Consecutive); + bool hasCompatibleFunctionAttributes(const Function *Caller, + const Function *Callee) const; /// @} }; diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 71ce45b0bc2e..6925b272b4a5 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -86,7 +86,7 @@ namespace { }; char VZeroUpperInserter::ID = 0; -} // namespace +} FunctionPass *llvm::createX86IssueVZeroUpperPass() { return new VZeroUpperInserter(); diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index c9e80945549b..90357257b9ef 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -105,7 +105,7 @@ private: /// The linked list node subobject inside of RegNode. Value *Link = nullptr; }; -} // namespace +} FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); } @@ -398,6 +398,7 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) { // Set up RegNodeEscapeIndex int RegNodeEscapeIndex = escapeRegNode(F); + FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex; // Only insert stores in catch handlers. Constant *FI8 = @@ -480,8 +481,8 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) { WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F); // Remember and return the index that we used. We save it in WinEHFuncInfo so - // that we can lower llvm.x86.seh.exceptioninfo later in filter functions - // without too much trouble. + // that we can lower llvm.x86.seh.recoverfp later in filter functions without + // too much trouble. int RegNodeEscapeIndex = escapeRegNode(F); FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex; @@ -528,14 +529,12 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) { } } - // Insert llvm.stackrestore into each __except block. - Function *StackRestore = - Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore); + // Insert llvm.x86.seh.restoreframe() into each __except block. + Function *RestoreFrame = + Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe); for (BasicBlock *ExceptBB : ExceptBlocks) { IRBuilder<> Builder(ExceptBB->begin()); - Value *SP = - Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); - Builder.CreateCall(StackRestore, {SP}); + Builder.CreateCall(RestoreFrame, {}); } } diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index e1baeacc3e57..2e44ac949b2c 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -40,7 +40,7 @@ public: raw_ostream &VStream, raw_ostream &CStream) const override; }; -} // namespace +} static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address, uint64_t &Size, uint16_t &Insn) { diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 8699ce84006c..ac954d0a8fa4 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -123,7 +123,7 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) { void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) { OS << "\t.cc_bottom " << Name << ".function\n"; } -} // namespace +} static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index eb8b5ec0b112..ba6ca843671e 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -32,6 +32,6 @@ namespace llvm { CodeGenOpt::Level OptLevel); ModulePass *createXCoreLowerThreadLocalPass(); -} // namespace llvm +} // end namespace llvm; #endif diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index 116e89a60ee4..607c77248952 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -58,6 +58,6 @@ namespace llvm { return 4; } }; -} // namespace llvm +} #endif diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp index 8d96105a2ebc..77292c4f8f52 100644 --- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp +++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp @@ -34,7 +34,7 @@ namespace { } }; char XCoreFTAOElim::ID = 0; -} // namespace +} /// createXCoreFrameToArgsOffsetEliminationPass - returns an instance of the /// Frame to args offset elimination pass diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 9c49a8d0dbaa..97f0494b6fe3 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -85,7 +85,7 @@ namespace llvm { // Memory barrier. MEMBARRIER }; - } // namespace XCoreISD + } //===--------------------------------------------------------------------===// // TargetLowering Implementation @@ -215,6 +215,6 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, LLVMContext &Context) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index a6e974e2e622..ee30344dcc25 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -41,7 +41,7 @@ namespace XCore { COND_INVALID }; } -} // namespace llvm +} // Pin the vtable to this file. void XCoreInstrInfo::anchor() {} @@ -196,15 +196,10 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } + if (!isUnpredicatedTerminator(I)) return false; @@ -312,14 +307,10 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, unsigned XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode())) return 0; diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 70beb4179118..b958c361f5a2 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -88,6 +88,6 @@ public: unsigned Reg, uint64_t Value) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index f866ab063396..996c6f59346d 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -50,7 +50,7 @@ namespace { bool runOnModule(Module &M) override; }; -} // namespace +} char XCoreLowerThreadLocal::ID = 0; diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h index 74a7f20570e8..569147872f23 100644 --- a/lib/Target/XCore/XCoreMCInstLower.h +++ b/lib/Target/XCore/XCoreMCInstLower.h @@ -37,6 +37,6 @@ private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; }; -} // namespace llvm +} #endif diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 8cce75fd0a73..078ffde18fb9 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -101,6 +101,6 @@ public: return SpillLabels; } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index 622484374a42..cfd80b3f3172 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -35,6 +35,6 @@ public: MachinePointerInfo SrcPtrInfo) const override; }; -} // namespace llvm +} #endif diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index 74ee594e9c5a..f01fb6714d86 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -61,6 +61,6 @@ public: return &InstrInfo.getRegisterInfo(); } }; -} // namespace llvm +} // End llvm namespace #endif diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h index a82702fc99fc..3563dbc5cb7b 100644 --- a/lib/Target/XCore/XCoreTargetStreamer.h +++ b/lib/Target/XCore/XCoreTargetStreamer.h @@ -22,6 +22,6 @@ public: virtual void emitCCBottomData(StringRef Name) = 0; virtual void emitCCBottomFunction(StringRef Name) = 0; }; -} // namespace llvm +} #endif |