aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r--lib/Target/X86/X86FastISel.cpp109
1 files changed, 99 insertions, 10 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index c890fdd1e519..036f5d2610e4 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return false;
+ LLVM_FALLTHROUGH;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -524,6 +528,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE1 = Subtarget->hasSSE1();
bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE4A = Subtarget->hasSSE4A();
bool HasAVX = Subtarget->hasAVX();
@@ -537,6 +542,16 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
case MVT::f80: // No f80 support yet.
default: return false;
case MVT::i1: {
+ // In case ValReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) {
+ unsigned KValReg = ValReg;
+ ValReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ValReg)
+ .addReg(KValReg);
+ ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -574,6 +589,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
} else
Opc = X86::ST_Fp64m;
break;
+ case MVT::x86mmx:
+ Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
+ break;
case MVT::v4f32:
if (Aligned) {
if (IsNonTemporal)
@@ -1268,6 +1286,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) {
+ unsigned KSrcReg = SrcReg;
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg)
+ .addReg(KSrcReg);
+ SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
}
@@ -1559,6 +1587,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) {
+ // In case ResultReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) {
+ unsigned KResultReg = ResultReg;
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(KResultReg);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
+
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
@@ -1740,10 +1779,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
unsigned KOpReg = OpReg;
- OpReg = createResultReg(&X86::GR8RegClass);
+ OpReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
+ OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
@@ -2084,10 +2125,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2297,10 +2340,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2423,12 +2468,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
if (OpReg == 0)
return false;
+ unsigned ImplicitDefReg;
+ if (Subtarget->hasAVX()) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+ }
+
unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
ResultReg);
+
if (Subtarget->hasAVX())
- MIB.addReg(OpReg);
+ MIB.addReg(ImplicitDefReg);
+
MIB.addReg(OpReg);
updateValueMap(I, ResultReg);
return true;
@@ -2461,7 +2516,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
- if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ // TODO: Support truncate to i1 with AVX512.
+ if (DstVT != MVT::i8 && (DstVT != MVT::i1 || Subtarget->hasAVX512()))
return false;
if (!TLI.isTypeLegal(SrcVT))
return false;
@@ -3105,8 +3161,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
return 0;
if (CS)
- if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
- CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
+ CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
return 0;
return 4;
@@ -3266,6 +3322,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Handle zero-extension from i1 to i8, which is common.
if (ArgVT == MVT::i1) {
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) {
+ unsigned KArgReg = ArgReg;
+ ArgReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg)
+ .addReg(KArgReg);
+ ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Set the high bits to zero.
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
ArgVT = MVT::i8;
@@ -3463,6 +3529,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
+ unsigned SrcReg = VA.getLocReg();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
@@ -3470,9 +3537,19 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
report_fatal_error("SSE register return with SSE disabled");
}
+ // If the return value is an i1 and AVX-512 is enabled, we need
+ // to do a fixup to make the copy legal.
+ if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) {
+ // Need to copy to a GR32 first.
+ // TODO: MOVZX isn't great here. We don't care about the upper bits.
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL);
+ }
+
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
CopyVT = MVT::f80;
CopyReg = createResultReg(&X86::RFP80RegClass);
@@ -3480,7 +3557,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Copy out the result.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
InRegs.push_back(VA.getLocReg());
// Round the f80 to the right size, which also moves it to the appropriate
@@ -3601,6 +3678,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
+ if (Subtarget->hasAVX512()) {
+ // Need to copy to a VK1 register.
+ unsigned ResultReg = createResultReg(&X86::VK1RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
+ return ResultReg;
+ }
case MVT::i8:
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
X86::sub_8bit);
@@ -3622,7 +3706,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
- case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
+ case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return 0;
+ VT = MVT::i8;
+ LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
case MVT::i16: Opc = X86::MOV16ri; break;
case MVT::i32: Opc = X86::MOV32ri; break;