[Mesa-dev] [PATCH 11/12] radeon/llvm: add support for CUBE ALU instruction

Mon May 7 10:08:53 PDT 2012

Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
---
 src/gallium/drivers/radeon/AMDGPUIntrinsics.td |    1 +
 src/gallium/drivers/radeon/AMDGPUUtil.cpp      |   11 +++++
 src/gallium/drivers/radeon/AMDGPUUtil.h        |    1 +
 src/gallium/drivers/radeon/R600CodeEmitter.cpp |   58 ++++++++++++++++--------
 src/gallium/drivers/radeon/R600Instructions.td |   13 +++++-
 5 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
index d8ea452..089d3b6 100644
--- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
+++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
@@ -54,6 +54,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
   def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
   def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>;
 }
 
 let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
index f4e60aa..8563b9d 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
@@ -98,6 +98,17 @@ bool llvm::isReductionOp(unsigned opcode)
   }
 }
 
+bool llvm::isCubeOp(unsigned opcode)
+{
+	  switch(opcode) {
+	    default: return false;
+	    case AMDIL::CUBE_r600:
+	    case AMDIL::CUBE_eg:
+	      return true;
+	  }
+}
+
+
 bool llvm::isFCOp(unsigned opcode)
 {
   switch(opcode) {
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h
index 299146e..38a7ebc 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.h
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.h
@@ -29,6 +29,7 @@ bool isPlaceHolderOpcode(unsigned opcode);
 bool isTransOp(unsigned opcode);
 bool isTexOp(unsigned opcode);
 bool isReductionOp(unsigned opcode);
+bool isCubeOp(unsigned opcode);
 bool isFCOp(unsigned opcode);
 
 /* XXX: Move these to AMDGPUInstrInfo.h */
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
index e0bc95b..eed53a4 100644
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -44,8 +44,9 @@ namespace {
   const R600RegisterInfo * TRI;
   bool evergreenEncoding;
 
+  bool isCube;
   bool isReduction;
-  unsigned reductionElement;
+  unsigned currentElement;
   bool isLast;
 
   unsigned section_start;
@@ -53,7 +54,7 @@ namespace {
   public:
 
   R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
-      _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+      _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false),
       isLast(true) { }
 
   const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -65,7 +66,7 @@ namespace {
   private:
 
   void emitALUInstr(MachineInstr  &MI);
-  void emitSrc(const MachineOperand & MO);
+  void emitSrc(const MachineOperand & MO, int chan_override  = -1);
   void emitDst(const MachineOperand & MO);
   void emitALU(MachineInstr &MI, unsigned numSrc);
   void emitTexInstr(MachineInstr &MI);
@@ -176,11 +177,19 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
           } else if (isReductionOp(MI.getOpcode())) {
             isReduction = true;
             isLast = false;
-            for (reductionElement = 0; reductionElement < 4; reductionElement++) {
-              isLast = (reductionElement == 3);
+            for (currentElement = 0; currentElement < 4; currentElement++) {
+              isLast = (currentElement == 3);
               emitALUInstr(MI);
             }
             isReduction = false;
+          } else if (isCubeOp(MI.getOpcode())) {
+              isCube = true;
+              isLast = false;
+              for (currentElement = 0; currentElement < 4; currentElement++) {
+                isLast = (currentElement == 3);
+                emitALUInstr(MI);
+              }
+              isCube = false;
           } else if (MI.getOpcode() == AMDIL::RETURN ||
                      MI.getOpcode() == AMDIL::BUNDLE ||
                      MI.getOpcode() == AMDIL::KILL) {
@@ -307,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
   /* Emit instruction type */
   emitByte(0);
 
-  unsigned int opIndex;
-  for (opIndex = 1; opIndex < numOperands; opIndex++) {
-    /* Literal constants are always stored as the last operand. */
-    if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
-      break;
+  if (isCube) {
+    static const int cube_src_swz[] = {2, 2, 0, 1};
+    emitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
+    emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
+    emitNullBytes(SRC_BYTE_COUNT);
+  } else {
+    unsigned int opIndex;
+    for (opIndex = 1; opIndex < numOperands; opIndex++) {
+      /* Literal constants are always stored as the last operand. */
+      if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+        break;
+      }
+      emitSrc(MI.getOperand(opIndex));
     }
-    emitSrc(MI.getOperand(opIndex));
-  }
 
     /* Emit zeros for unused sources */
-  for ( ; opIndex < 4; opIndex++) {
-    emitNullBytes(SRC_BYTE_COUNT);
+    for ( ; opIndex < 4; opIndex++) {
+      emitNullBytes(SRC_BYTE_COUNT);
+    }
   }
 
   emitDst(dstOp);
@@ -326,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
   emitALU(MI, numOperands - 1);
 }
 
-void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */)
 {
   uint32_t value = 0;
   /* Emit the source select (2 bytes).  For GPRs, this is the register index.
@@ -352,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO)
   }
 
   /* Emit the source channel (1 byte) */
-  if (isReduction) {
-    emitByte(reductionElement);
+  if (chan_override != -1) {
+    emitByte(chan_override);
+  } else if (isReduction) {
+    emitByte(currentElement);
   } else if (MO.isReg()) {
     emitByte(TRI->getHWRegChan(MO.getReg()));
   } else {
@@ -395,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
     emitByte(getHWReg(MO.getReg()));
 
     /* Emit the element of the destination register (1 byte)*/
-    if (isReduction) {
-      emitByte(reductionElement);
+    if (isReduction || isCube) {
+      emitByte(currentElement);
     } else {
       emitByte(TRI->getHWRegChan(MO.getReg()));
     }
@@ -409,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
     }
 
     /* Emit writemask (1 byte).  */
-    if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+    if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg()))
          || MO.getTargetFlags() & MO_FLAG_MASK) {
       emitByte(0);
     } else {
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index edbade7..381ad71 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
   }
 
 class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
-                      InstrItinClass itin = AnyALU> :
+                      InstrItinClass itin = VecALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
           ins,
@@ -537,6 +537,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
   [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
 >;
 
+class CUBE_Common <bits<32> inst> : InstR600 <
+  inst,
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src),
+  "CUBE $dst $src",
+  [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+  VecALU
+>;
+
 class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
   inst, "EXP_IEEE",
   []> {
@@ -681,6 +690,7 @@ let Gen = AMDGPUGen.R600 in {
   def CNDGT_r600 : CNDGT_Common<0x19>;
   def CNDGE_r600 : CNDGE_Common<0x1A>;
   def DOT4_r600 : DOT4_Common<0x50>;
+  def CUBE_r600 : CUBE_Common<0x52>;
   def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
   def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
   def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
@@ -887,6 +897,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in {
   def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
   def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
   def DOT4_eg : DOT4_Common<0xBE>;
+  def CUBE_eg : CUBE_Common<0xC0>;
 
 } // End AMDGPUGen.EG_CAYMAN
 
-- 
1.7.10.1