1 files changed, 98 insertions, 56 deletions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 113d6249fa60..e2a0f05d2b34 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     }
 
     case Intrinsic::r600_implicitarg_ptr: {
-      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
+      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
       uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
       return DAG.getConstant(ByteOffset, DL, PtrVT);
     }
@@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                                SDValue Op,
                                                SelectionDAG &DAG) const {
   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-  if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
 
   const DataLayout &DL = DAG.getDataLayout();
   const GlobalValue *GV = GSD->getGlobal();
-  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
 
   SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
   return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                                                    unsigned DwordOffset) const {
   unsigned ByteOffset = DwordOffset * 4;
   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                      AMDGPUASI.CONSTANT_BUFFER_0);
+                                      AMDGPUAS::PARAM_I_ADDRESS);
 
   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
   assert(isInt<16>(ByteOffset));
@@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
   //TODO: Who creates the i8 stores?
   assert(Store->isTruncatingStore()
          || Store->getValue().getValueType() == MVT::i8);
-  assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
 
   SDValue Mask;
   if (Store->getMemoryVT() == MVT::i8) {
@@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
   // Load dword
   // TODO: can we be smarter about machine pointer info?
   MachinePointerInfo PtrInfo(UndefValue::get(
-      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
   SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
 
   Chain = Dst.getValue(1);
@@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
 
   // Neither LOCAL nor PRIVATE can do vectors at the moment
-  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
-    if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
          StoreNode->isTruncatingStore()) {
       // Add an extra level of chain to isolate this vector
       SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
@@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
                                   DAG.getConstant(2, DL, PtrVT));
 
-  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
     // It is beneficial to create MSKOR here instead of combiner to avoid
     // artificial dependencies introduced by RMW
     if (StoreNode->isTruncatingStore()) {
@@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
-  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
+  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
     return SDValue();
 
   if (MemVT.bitsLT(MVT::i32))
@@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
   // Load dword
   // TODO: can we be smarter about machine pointer info?
   MachinePointerInfo PtrInfo(UndefValue::get(
-      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
   SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
 
   // Get offset within the register.
@@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   EVT MemVT = LoadNode->getMemoryVT();
   ISD::LoadExtType ExtType = LoadNode->getExtensionType();
 
-  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
+  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
       ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
     return lowerPrivateExtLoad(Op, DAG);
   }
@@ -1451,45 +1451,29 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = LoadNode->getChain();
   SDValue Ptr = LoadNode->getBasePtr();
 
-  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
-      LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
+  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
       return scalarizeVectorLoad(LoadNode, DAG);
   }
 
+  // This is still used for explicit load from addrspace(8)
   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
   if (ConstantBlock > -1 &&
       ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
        (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
     SDValue Result;
-    if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
-        isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
+    if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
         isa<ConstantSDNode>(Ptr)) {
-      SDValue Slots[4];
-      for (unsigned i = 0; i < 4; i++) {
-        // We want Const position encoded with the following formula :
-        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
-        // const_index is Ptr computed by llvm using an alignment of 16.
-        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
-        // then div by 4 at the ISel step
-        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-            DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
-        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
-      }
-      EVT NewVT = MVT::v4i32;
-      unsigned NumElements = 4;
-      if (VT.isVector()) {
-        NewVT = VT;
-        NumElements = VT.getVectorNumElements();
-      }
-      Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+      return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
     } else {
+      //TODO: Does this even work?
       // non-constant ptr can't be folded, keeps it as a v4f32 load
       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
                       DAG.getConstant(4, DL, MVT::i32)),
                       DAG.getConstant(LoadNode->getAddressSpace() -
-                                      AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
+                                      AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
           );
     }
 
@@ -1525,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getMergeValues(MergedValues, DL);
   }
 
-  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
+  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
     return SDValue();
   }
 
@@ -1622,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
     }
 
     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                          AMDGPUASI.CONSTANT_BUFFER_0);
+                                          AMDGPUAS::PARAM_I_ADDRESS);
 
     // i64 isn't a legal type, so the register type used ends up as i32, which
     // isn't expected here. It attempts to create this sextload, but it ends up
@@ -1646,17 +1630,17 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
     unsigned PartOffset = VA.getLocMemOffset();
+    unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
     SDValue Arg = DAG.getLoad(
         ISD::UNINDEXED, Ext, VT, DL, Chain,
         DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
         PtrInfo,
-        MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+        MemVT, Alignment, MachineMemOperand::MONonTemporal |
                                         MachineMemOperand::MODereferenceable |
                                         MachineMemOperand::MOInvariant);
 
-    // 4 is the preferred alignment for the CONSTANT memory space.
     InVals.push_back(Arg);
   }
   return Chain;
@@ -1672,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
                                           const SelectionDAG &DAG) const {
   // Local and Private addresses do not handle vectors. Limit to i32
-  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
+  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
     return (MemVT.getSizeInBits() <= 32);
   }
   return true;
@@ -1701,14 +1685,15 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 static SDValue CompactSwizzlableVector(
   SelectionDAG &DAG, SDValue VectorEntry,
   DenseMap<unsigned, unsigned> &RemapSwizzle) {
-  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
   assert(RemapSwizzle.empty());
-  SDValue NewBldVec[4] = {
-    VectorEntry.getOperand(0),
-    VectorEntry.getOperand(1),
-    VectorEntry.getOperand(2),
-    VectorEntry.getOperand(3)
-  };
+
+  SDLoc DL(VectorEntry);
+  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+  SDValue NewBldVec[4];
+  for (unsigned i = 0; i < 4; i++)
+    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+                               DAG.getIntPtrConstant(i, DL));
 
   for (unsigned i = 0; i < 4; i++) {
     if (NewBldVec[i].isUndef())
@@ -1743,15 +1728,17 @@ static SDValue CompactSwizzlableVector(
 
 static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
                                 DenseMap<unsigned, unsigned> &RemapSwizzle) {
-  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
   assert(RemapSwizzle.empty());
-  SDValue NewBldVec[4] = {
-      VectorEntry.getOperand(0),
-      VectorEntry.getOperand(1),
-      VectorEntry.getOperand(2),
-      VectorEntry.getOperand(3)
-  };
-  bool isUnmovable[4] = { false, false, false, false };
+
+  SDLoc DL(VectorEntry);
+  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+  SDValue NewBldVec[4];
+  bool isUnmovable[4] = {false, false, false, false};
+  for (unsigned i = 0; i < 4; i++)
+    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+                               DAG.getIntPtrConstant(i, DL));
+
   for (unsigned i = 0; i < 4; i++) {
     RemapSwizzle[i] = i;
     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
@@ -1782,7 +1769,6 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
                                             SelectionDAG &DAG,
                                             const SDLoc &DL) const {
-  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
   // Old -> New swizzle values
   DenseMap<unsigned, unsigned> SwizzleRemap;
 
@@ -1804,6 +1790,52 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
   return BuildVector;
 }
 
+SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(LoadNode);
+  EVT VT = LoadNode->getValueType(0);
+  SDValue Chain = LoadNode->getChain();
+  SDValue Ptr = LoadNode->getBasePtr();
+  assert (isa<ConstantSDNode>(Ptr));
+
+  //TODO: Support smaller loads
+  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
+    return SDValue();
+
+  if (LoadNode->getAlignment() < 4)
+    return SDValue();
+
+  int ConstantBlock = ConstantAddressBlock(Block);
+
+  SDValue Slots[4];
+  for (unsigned i = 0; i < 4; i++) {
+    // We want Const position encoded with the following formula :
+    // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+    // const_index is Ptr computed by llvm using an alignment of 16.
+    // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+    // then div by 4 at the ISel step
+    SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+        DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
+    Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+  }
+  EVT NewVT = MVT::v4i32;
+  unsigned NumElements = 4;
+  if (VT.isVector()) {
+    NewVT = VT;
+    NumElements = VT.getVectorNumElements();
+  }
+  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+  if (!VT.isVector()) {
+    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+                         DAG.getConstant(0, DL, MVT::i32));
+  }
+  SDValue MergedValues[2] = {
+    Result,
+    Chain
+  };
+  return DAG.getMergeValues(MergedValues, DL);
+}
+
 //===----------------------------------------------------------------------===//
 // Custom DAG Optimizations
 //===----------------------------------------------------------------------===//
@@ -2022,6 +2054,16 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
     return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
   }
+
+  case ISD::LOAD: {
+    LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+    SDValue Ptr = LoadNode->getBasePtr();
+    if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
+         isa<ConstantSDNode>(Ptr))
+      return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
+    break;
+  }
+
   default: break;
   }