summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp12
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp52
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td28
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--lib/Target/X86/X86InstrSSE.td3
7 files changed, 85 insertions, 29 deletions
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 160107cd7e2b..d52cd84246a1 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -946,6 +946,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::CMP_SWAP_128:
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
+ case AArch64::AESMCrrTied:
+ case AArch64::AESIMCrrTied: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
+ AArch64::AESIMCrr))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1));
+ transferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
}
return false;
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 4907d082eda0..7c6a99990406 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- auto CSStackSize = AFI->getCalleeSavedStackSize();
+ bool IsWin64 =
+ Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+
+ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes - CSStackSize);
+ AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
NumBytes = 0;
- } else if (CSStackSize != 0) {
+ } else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
- -CSStackSize);
- NumBytes -= CSStackSize;
+ -PrologueSaveSize);
+ NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
}
if (HasFP) {
- // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
- int FPOffset = CSStackSize - 16;
+ // Only set up FP if we actually need to. Frame pointer is fp =
+ // sp - fixedobject - 16.
+ int FPOffset = AFI->getCalleeSavedStackSize() - 16;
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, Reg, 2 * StackGrowth - FixedObject));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
- auto CSStackSize = AFI->getCalleeSavedStackSize();
+ bool IsWin64 =
+ Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+
+ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
- if (!CombineSPBump && CSStackSize != 0)
+ if (!CombineSPBump && PrologueSaveSize != 0)
convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
+ MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
@@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
return;
}
- NumBytes -= CSStackSize;
+ NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
@@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (RedZone && ArgumentPopSize == 0)
return;
- bool NoCalleeSaveRestore = CSStackSize == 0;
+ bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += ArgumentPopSize;
@@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be able to save any instructions.
if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
+ -AFI->getCalleeSavedStackSize() + 16, TII,
+ MachineInstr::FrameDestroy);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
MachineInstr::FrameDestroy);
@@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- int FPOffset = MFI.getObjectOffset(FI) + 16;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ bool IsWin64 =
+ Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
bool isFixed = MFI.isFixedObjectIndex(FI);
@@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPairs(
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();
- unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
- if (IsWin64)
- Offset -= alignTo(GPRSaveSize, 16);
-
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8c30c4410c09..9d879886d39d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9586,8 +9586,8 @@ static bool performTBISimplification(SDValue Addr,
SelectionDAG &DAG) {
APInt DemandedMask = APInt::getLowBitsSet(64, 56);
KnownBits Known;
- TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
- DCI.isBeforeLegalizeOps());
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 0dcf07f98412..5049a39814f1 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -37,6 +37,9 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<"FeatureSPE", "spe">;
+def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
+ AssemblerPredicate<"FeatureFuseAES",
+ "fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
@@ -5304,6 +5307,31 @@ def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
+// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
+// for AES fusion on some CPUs.
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+}
+
+// Only use constrained versions of AES(I)MC instructions if they are paired with
+// AESE/AESD.
+def : Pat<(v16i8 (int_aarch64_crypto_aesmc
+ (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
+def : Pat<(v16i8 (int_aarch64_crypto_aesimc
+ (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index ccc9d2ad1b48..963cfadc54fd 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -118,11 +118,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
// Fuse AES crypto operations.
switch(SecondOpcode) {
// AES encode.
- case AArch64::AESMCrr :
+ case AArch64::AESMCrr:
+ case AArch64::AESMCrrTied:
return FirstOpcode == AArch64::AESErr ||
FirstOpcode == AArch64::INSTRUCTION_LIST_END;
// AES decode.
case AArch64::AESIMCrr:
+ case AArch64::AESIMCrrTied:
return FirstOpcode == AArch64::AESDrr ||
FirstOpcode == AArch64::INSTRUCTION_LIST_END;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ba8eb8656585..7563bffd8f87 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3984,6 +3984,13 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
if (Offset != MFI.getObjectOffset(FI))
return false;
+ // If this is not byval, check that the argument stack object is immutable.
+ // inalloca and argument copy elision can create mutable argument stack
+ // objects. Byval objects can be mutated, but a byval call intends to pass the
+ // mutated memory.
+ if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
+ return false;
+
if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
// If the argument location is wider than the argument type, check that any
// extension flags match.
@@ -30605,8 +30612,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask(APInt::getSignMask(BitWidth));
KnownBits Known;
- TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
- DCI.isBeforeLegalizeOps());
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
// If we changed the computation somewhere in the DAG, this change will
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index fe87bbd99473..650e4fc8716c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3697,8 +3697,7 @@ let SchedRW = [WriteNop] in {
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins),
- "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
- OBXS, Requires<[HasSSE2]>;
+ "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, OBXS;
}
let SchedRW = [WriteFence] in {