diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 87ff5a719fca..0c5b8a79dd62 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7650,17 +7650,22 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. if (!isAfterLegalize && VT.isVector()) { - SmallVector<int, 4> ClearMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - if (ZeroMask[i]) - ClearMask[i] = i + NumElems; - else if (LoadMask[i]) - ClearMask[i] = i; + unsigned NumMaskElts = VT.getVectorNumElements(); + if ((NumMaskElts % NumElems) == 0) { + unsigned Scale = NumMaskElts / NumElems; + SmallVector<int, 4> ClearMask(NumMaskElts, -1); + for (unsigned i = 0; i < NumElems; ++i) { + if (UndefMask[i]) + continue; + int Offset = ZeroMask[i] ? NumMaskElts : 0; + for (unsigned j = 0; j != Scale; ++j) + ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; + } + SDValue V = CreateLoad(VT, LDBase); + SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT); + return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } - SDValue V = CreateLoad(VT, LDBase); - SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) - : DAG.getConstantFP(0.0, DL, VT); - return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } } @@ -31664,8 +31669,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector<int, 4> RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { - ArrayRef<int> LoMask(Mask.data() + 0, 4); - ArrayRef<int> HiMask(Mask.data() + 4, 4); + ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4); + ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4); // PSHUFLW: permute lower 4 elements only. if (isUndefOrInRange(LoMask, 0, 4) && |