[Mesa-dev] [PATCH 3/5] AMDGPU: Promote floating-point load/store to integer load/store

Tom Stellard tom at stellard.net
Fri Dec 7 14:25:14 PST 2012


From: Tom Stellard <thomas.stellard at amd.com>

This will reduce the number of tablegen patterns we need.
---
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp           | 14 +++++
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp      | 11 ++--
 lib/Target/AMDGPU/R600ISelLowering.cpp             |  2 -
 lib/Target/AMDGPU/R600Instructions.td              | 70 ++++++----------------
 4 files changed, 37 insertions(+), 60 deletions(-)

diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1f31c2a..473dac4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -41,6 +41,20 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
 
+  // Lower floating point store/load to integer store/load to reduce the number
+  // of patterns in tablegen.
+  setOperationAction(ISD::STORE, MVT::f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+  setOperationAction(ISD::LOAD, MVT::f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
   setOperationAction(ISD::UDIV, MVT::i32, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 756f945..154f05b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -159,13 +159,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       break;
     }
     case AMDGPU::CONSTANT_LOAD_eg:
-    case AMDGPU::VTX_READ_PARAM_i32_eg:
-    case AMDGPU::VTX_READ_PARAM_f32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_i8_eg:
-    case AMDGPU::VTX_READ_GLOBAL_i32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_f32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: {
+    case AMDGPU::VTX_READ_PARAM_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_8_eg:
+    case AMDGPU::VTX_READ_GLOBAL_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_128_eg: {
       uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
       uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
 
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index f7ae3c9..33cd2cc 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -67,9 +67,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT, MVT::f32, Custom);
 
   setOperationAction(ISD::STORE, MVT::i32, Custom);
-  setOperationAction(ISD::STORE, MVT::f32, Custom);
   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
-  setOperationAction(ISD::STORE, MVT::v4f32, Custom);
 
   setTargetDAGCombine(ISD::FP_ROUND);
 
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 31639a3..092dd8c 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1119,8 +1119,10 @@ let Predicates = [isEGorCayman] in {
 //===----------------------------------------------------------------------===//
 let usesCustomInserter = 1 in {
 
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT <
-  0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []> {
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+                              list<dag> pattern>
+    : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
+                 !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
   let RIM         = 0;
   // XXX: Have a separate instruction for non-indexed writes.
   let TYPE        = 1;
@@ -1139,38 +1141,16 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_R
 
 // 32-bit store
 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
-  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
-  0x1, "RAT_WRITE_CACHELESS_32_eg"
->;
-
-// i32 global_store
-def : Pat <
-  (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
-  (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
->;
-
-// Floating point global_store
-def : Pat <
-  (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
-  (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
+  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  0x1, "RAT_WRITE_CACHELESS_32_eg",
+  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
 >;
 
 //128-bit store
 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
-  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
-  0xf, "RAT_WRITE_CACHELESS_128"
->;
-
-// v4f32 global store
-def : Pat <
-  (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr),
-  (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0)
->;
-
-// v4i32 global store
-def : Pat <
-  (global_store (v4i32 R600_Reg128:$val), R600_TReg32_X:$ptr),
-  (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0)
+  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  0xf, "RAT_WRITE_CACHELESS_128",
+  [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
 >;
 
 class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern>
@@ -1280,7 +1260,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
   // to be caused by ALU instructions in the next instruction group that wrote
   // to the $ptr registers of the VTX_READ.  
   // e.g.
-  // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24
+  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
   // %T2_X<def> = MOV %ZERO
   //Adding this constraint prevents this from happening.
   let Constraints = "$ptr.ptr = $dst";
@@ -1306,48 +1286,36 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
 // VTX Read from parameter memory space
 //===----------------------------------------------------------------------===//
 
-class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0,
-  [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
 >;
 
-def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>;
-def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>;
-
-
 //===----------------------------------------------------------------------===//
 // VTX Read from global memory space
 //===----------------------------------------------------------------------===//
 
 // 8-bit reads
-def VTX_READ_GLOBAL_i8_eg : VTX_READ_8_eg <1,
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
   [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
 >;
 
 // 32-bit reads
-
-class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1,
-  [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
 >;
 
-def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>;
-def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>;
-
 // 128-bit reads
-
-class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1,
-  [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+  [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
 >;
 
-def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>;
-def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>;
-
 //===----------------------------------------------------------------------===//
 // Constant Loads
 // XXX: We are currently storing all constants in the global address space.
 //===----------------------------------------------------------------------===//
 
 def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
-  [(set (f32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+  [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
 >;
 
 }
-- 
1.7.11.4



More information about the mesa-dev mailing list