diff options
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 109 |
1 files changed, 99 insertions, 10 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index c890fdd1e519..036f5d2610e4 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return false; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -524,6 +528,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { + bool HasSSE1 = Subtarget->hasSSE1(); bool HasSSE2 = Subtarget->hasSSE2(); bool HasSSE4A = Subtarget->hasSSE4A(); bool HasAVX = Subtarget->hasAVX(); @@ -537,6 +542,16 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, case MVT::f80: // No f80 support yet. default: return false; case MVT::i1: { + // In case ValReg is a K register, COPY to a GPR + if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) { + unsigned KValReg = ValReg; + ValReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ValReg) + .addReg(KValReg); + ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true, + X86::sub_8bit); + } // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -574,6 +589,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, } else Opc = X86::ST_Fp64m; break; + case MVT::x86mmx: + Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr; + break; case MVT::v4f32: if (Aligned) { if (IsNonTemporal) @@ -1268,6 +1286,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (SrcVT == MVT::i1) { if (Outs[0].Flags.isSExt()) return false; + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) { + unsigned KSrcReg = SrcReg; + SrcReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), SrcReg) + .addReg(KSrcReg); + SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, + X86::sub_8bit); + } SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; } @@ -1559,6 +1587,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { // Handle zero-extension from i1 to i8, which is common. MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT == MVT::i1) { + // In case ResultReg is a K register, COPY to a GPR + if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) { + unsigned KResultReg = ResultReg; + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(KResultReg); + ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); + } + // Set the high bits to zero. ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; @@ -1740,10 +1779,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; - OpReg = createResultReg(&X86::GR8RegClass); + OpReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); + OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(OpReg) @@ -2084,10 +2125,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(&X86::GR8RegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2297,10 +2340,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(&X86::GR8RegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2423,12 +2468,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, if (OpReg == 0) return false; + unsigned ImplicitDefReg; + if (Subtarget->hasAVX()) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + } + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), ResultReg); + if (Subtarget->hasAVX()) - MIB.addReg(OpReg); + MIB.addReg(ImplicitDefReg); + MIB.addReg(OpReg); updateValueMap(I, ResultReg); return true; @@ -2461,7 +2516,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { EVT DstVT = TLI.getValueType(DL, I->getType()); // This code only handles truncation to byte. - if (DstVT != MVT::i8 && DstVT != MVT::i1) + // TODO: Support truncate to i1 with AVX512. + if (DstVT != MVT::i8 && (DstVT != MVT::i1 || Subtarget->hasAVX512())) return false; if (!TLI.isTypeLegal(SrcVT)) return false; @@ -3105,8 +3161,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, return 0; if (CS) - if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || - CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) + if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) || + CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) return 0; return 4; @@ -3266,6 +3322,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Handle zero-extension from i1 to i8, which is common. if (ArgVT == MVT::i1) { + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) { + unsigned KArgReg = ArgReg; + ArgReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg) + .addReg(KArgReg); + ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true, + X86::sub_8bit); + } // Set the high bits to zero. ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); ArgVT = MVT::i8; @@ -3463,6 +3529,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); unsigned CopyReg = ResultReg + i; + unsigned SrcReg = VA.getLocReg(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && @@ -3470,9 +3537,19 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { report_fatal_error("SSE register return with SSE disabled"); } + // If the return value is an i1 and AVX-512 is enabled, we need + // to do a fixup to make the copy legal. + if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) { + // Need to copy to a GR32 first. + // TODO: MOVZX isn't great here. We don't care about the upper bits. + SrcReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL); + } + // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && + if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) && isScalarFPTypeInSSEReg(VA.getValVT())) { CopyVT = MVT::f80; CopyReg = createResultReg(&X86::RFP80RegClass); @@ -3480,7 +3557,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Copy out the result. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); + TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg); InRegs.push_back(VA.getLocReg()); // Round the f80 to the right size, which also moves it to the appropriate @@ -3601,6 +3678,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: + if (Subtarget->hasAVX512()) { + // Need to copy to a VK1 register. + unsigned ResultReg = createResultReg(&X86::VK1RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg); + return ResultReg; + } case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); @@ -3622,7 +3706,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { unsigned Opc = 0; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); - case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; + case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return 0; + VT = MVT::i8; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; |