[Mesa-dev] [PATCH 11/12] radeon/llvm: add support for CUBE ALU instruction
Vadim Girlin
vadimgirlin at gmail.com
Mon May 7 10:08:53 PDT 2012
Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
---
src/gallium/drivers/radeon/AMDGPUIntrinsics.td | 1 +
src/gallium/drivers/radeon/AMDGPUUtil.cpp | 11 +++++
src/gallium/drivers/radeon/AMDGPUUtil.h | 1 +
src/gallium/drivers/radeon/R600CodeEmitter.cpp | 58 ++++++++++++++++--------
src/gallium/drivers/radeon/R600Instructions.td | 13 +++++-
5 files changed, 63 insertions(+), 21 deletions(-)
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
index d8ea452..089d3b6 100644
--- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
+++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
@@ -54,6 +54,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
index f4e60aa..8563b9d 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
@@ -98,6 +98,17 @@ bool llvm::isReductionOp(unsigned opcode)
}
}
+bool llvm::isCubeOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::CUBE_r600:
+ case AMDIL::CUBE_eg:
+ return true;
+ }
+}
+
+
bool llvm::isFCOp(unsigned opcode)
{
switch(opcode) {
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h
index 299146e..38a7ebc 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.h
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.h
@@ -29,6 +29,7 @@ bool isPlaceHolderOpcode(unsigned opcode);
bool isTransOp(unsigned opcode);
bool isTexOp(unsigned opcode);
bool isReductionOp(unsigned opcode);
+bool isCubeOp(unsigned opcode);
bool isFCOp(unsigned opcode);
/* XXX: Move these to AMDGPUInstrInfo.h */
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
index e0bc95b..eed53a4 100644
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -44,8 +44,9 @@ namespace {
const R600RegisterInfo * TRI;
bool evergreenEncoding;
+ bool isCube;
bool isReduction;
- unsigned reductionElement;
+ unsigned currentElement;
bool isLast;
unsigned section_start;
@@ -53,7 +54,7 @@ namespace {
public:
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
- _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+ _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false),
isLast(true) { }
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -65,7 +66,7 @@ namespace {
private:
void emitALUInstr(MachineInstr &MI);
- void emitSrc(const MachineOperand & MO);
+ void emitSrc(const MachineOperand & MO, int chan_override = -1);
void emitDst(const MachineOperand & MO);
void emitALU(MachineInstr &MI, unsigned numSrc);
void emitTexInstr(MachineInstr &MI);
@@ -176,11 +177,19 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
} else if (isReductionOp(MI.getOpcode())) {
isReduction = true;
isLast = false;
- for (reductionElement = 0; reductionElement < 4; reductionElement++) {
- isLast = (reductionElement == 3);
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
emitALUInstr(MI);
}
isReduction = false;
+ } else if (isCubeOp(MI.getOpcode())) {
+ isCube = true;
+ isLast = false;
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
+ emitALUInstr(MI);
+ }
+ isCube = false;
} else if (MI.getOpcode() == AMDIL::RETURN ||
MI.getOpcode() == AMDIL::BUNDLE ||
MI.getOpcode() == AMDIL::KILL) {
@@ -307,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
/* Emit instruction type */
emitByte(0);
- unsigned int opIndex;
- for (opIndex = 1; opIndex < numOperands; opIndex++) {
- /* Literal constants are always stored as the last operand. */
- if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
- break;
+ if (isCube) {
+ static const int cube_src_swz[] = {2, 2, 0, 1};
+ emitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
+ emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
+ emitNullBytes(SRC_BYTE_COUNT);
+ } else {
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ /* Literal constants are always stored as the last operand. */
+ if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+ break;
+ }
+ emitSrc(MI.getOperand(opIndex));
}
- emitSrc(MI.getOperand(opIndex));
- }
/* Emit zeros for unused sources */
- for ( ; opIndex < 4; opIndex++) {
- emitNullBytes(SRC_BYTE_COUNT);
+ for ( ; opIndex < 4; opIndex++) {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
}
emitDst(dstOp);
@@ -326,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
emitALU(MI, numOperands - 1);
}
-void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */)
{
uint32_t value = 0;
/* Emit the source select (2 bytes). For GPRs, this is the register index.
@@ -352,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO)
}
/* Emit the source channel (1 byte) */
- if (isReduction) {
- emitByte(reductionElement);
+ if (chan_override != -1) {
+ emitByte(chan_override);
+ } else if (isReduction) {
+ emitByte(currentElement);
} else if (MO.isReg()) {
emitByte(TRI->getHWRegChan(MO.getReg()));
} else {
@@ -395,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
emitByte(getHWReg(MO.getReg()));
/* Emit the element of the destination register (1 byte)*/
- if (isReduction) {
- emitByte(reductionElement);
+ if (isReduction || isCube) {
+ emitByte(currentElement);
} else {
emitByte(TRI->getHWRegChan(MO.getReg()));
}
@@ -409,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
}
/* Emit writemask (1 byte). */
- if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+ if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg()))
|| MO.getTargetFlags() & MO_FLAG_MASK) {
emitByte(0);
} else {
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index edbade7..381ad71 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
}
class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
+ InstrItinClass itin = VecALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
ins,
@@ -537,6 +537,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
>;
+class CUBE_Common <bits<32> inst> : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+>;
+
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
[]> {
@@ -681,6 +690,7 @@ let Gen = AMDGPUGen.R600 in {
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
+ def CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
@@ -887,6 +897,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in {
def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
def DOT4_eg : DOT4_Common<0xBE>;
+ def CUBE_eg : CUBE_Common<0xC0>;
} // End AMDGPUGen.EG_CAYMAN
--
1.7.10.1
More information about the mesa-dev
mailing list