[PATCH] r600g: rework case switches into table lookups
Christian König
deathsimple at vodafone.de
Wed Mar 9 10:13:12 PST 2011
---
src/gallium/drivers/r600/eg_asm.c | 102 +++++++
src/gallium/drivers/r600/r600_asm.c | 507 ++++++++++++-----------------------
src/gallium/drivers/r600/r600_asm.h | 45 ++--
3 files changed, 300 insertions(+), 354 deletions(-)
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 8190df7..cd6cafd 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -29,6 +29,108 @@
#include "r600_opcodes.h"
#include "evergreend.h"
+/* Note that FLT_TO_INT* instructions are vector instructions
+ * on Evergreen, despite what the documentation says. */
+const struct r600_bc_alu_inst_info eg_bc_alu_op2_inst_info[] = {
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP] = { 0, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4] = { 2, 0, 1, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE] = { 2, 0, 1, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE] = { 2, 0, 1, 1, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4] = { 1, 0, 1, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW] = { 2, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT] = { 1, 0, 0, 0, 1, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR] = { 1, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT] = { 2, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT] = { 1, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE] = { 1, 0, 0, 0, 0, 1 }
+};
+
+const struct r600_bc_alu_inst_info eg_bc_alu_op3_inst_info[] = {
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT] = { 3, 0, 0, 0, 0, 1 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT] = { 3, 0, 0, 0, 0, 0 },
+ [EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT] = { 3, 0, 0, 0, 0, 0 }
+};
+
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
unsigned id = cf->id;
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1504ef6..2caac5a 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -35,120 +35,148 @@
#define NUM_OF_CYCLES 3
#define NUM_OF_COMPONENTS 4
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
-{
- if(alu->is_op3)
- return 3;
-
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- switch (alu->inst) {
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- return 2;
-
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- }
- break;
- case CHIPREV_EVERGREEN:
- switch (alu->inst) {
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
- return 2;
-
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- }
- break;
- }
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf);
+static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+static void r600_cf_vtx(struct r600_vertex_element *ve);
+
+static const struct {
+ unsigned tex_and_vtx_inst;
+ unsigned fetch_resource_start;
+
+ int (*cf_build)(struct r600_bc *bc, struct r600_bc_cf *cf);
+ void (*cf_vtx_build)(uint32_t *bytecode, const struct r600_bc_cf *cf);
+ int (*alu_build)(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+ void (*fetch_shader_build)(struct r600_vertex_element *ve);
+
+} r600_bc_chipref_info[] = {
+ [CHIPREV_R600] = {
+ .tex_and_vtx_inst = 8,
+ .fetch_resource_start = 160,
+ .cf_build = r600_bc_cf_build,
+ .cf_vtx_build = r600_bc_cf_vtx_build,
+ .alu_build = r600_bc_alu_build,
+ .fetch_shader_build = r600_cf_vtx
+ },
+ [CHIPREV_R700] = {
+ .tex_and_vtx_inst = 16,
+ .fetch_resource_start = 160,
+ .cf_build = r600_bc_cf_build,
+ .cf_vtx_build = r700_bc_cf_vtx_build,
+ .alu_build = r700_bc_alu_build,
+ .fetch_shader_build = r600_cf_vtx
+ },
+ [CHIPREV_EVERGREEN] = {
+ .tex_and_vtx_inst = 64,
+ .fetch_resource_start = 0,
+ .cf_build = eg_bc_cf_build,
+ .alu_build = r700_bc_alu_build,
+ .fetch_shader_build = eg_cf_vtx
+ },
+};
- return 3;
-}
+static const struct r600_bc_alu_inst_info r600_bc_alu_op2_inst_info[] = {
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP] = { 0, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE] = { 2, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT] = { 2, 1, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4] = { 2, 0, 1, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE] = { 2, 0, 1, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE] = { 2, 0, 1, 1, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4] = { 1, 0, 1, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV] = { 1, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA] = { 1, 0, 0, 0, 1, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR] = { 1, 0, 0, 0, 1, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT] = { 1, 0, 0, 0, 1, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT] = { 1, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR] = { 1, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC] = { 1, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS] = { 1, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT] = { 2, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE] = { 1, 0, 0, 0, 0, 1 }
+};
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+static const struct r600_bc_alu_inst_info r600_bc_alu_op3_inst_info[] = {
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT] = { 3, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2] = { 3, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2] = { 3, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4] = { 3, 0, 0, 0, 0, 1 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT] = { 3, 0, 0, 0, 0, 0 },
+ [V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT] = { 3, 0, 0, 0, 0, 0 }
+};
static struct r600_bc_cf *r600_bc_cf(void)
{
@@ -207,12 +235,16 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_RS780:
case CHIP_RS880:
bc->chiprev = CHIPREV_R600;
+ bc->op2_inst_info = r600_bc_alu_op2_inst_info;
+ bc->op3_inst_info = r600_bc_alu_op3_inst_info;
break;
case CHIP_RV770:
case CHIP_RV730:
case CHIP_RV710:
case CHIP_RV740:
bc->chiprev = CHIPREV_R700;
+ bc->op2_inst_info = r600_bc_alu_op2_inst_info;
+ bc->op3_inst_info = r600_bc_alu_op3_inst_info;
break;
case CHIP_CEDAR:
case CHIP_REDWOOD:
@@ -224,6 +256,8 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_TURKS:
case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
+ bc->op2_inst_info = eg_bc_alu_op2_inst_info;
+ bc->op3_inst_info = eg_bc_alu_op3_inst_info;
break;
default:
R600_ERR("unknown family %d\n", bc->family);
@@ -291,135 +325,33 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
+static int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ if(alu->is_op3)
+ return 3;
+ else
+ return bc->op2_inst_info[alu->inst].num_operands;
+}
+
/* alu instructions that can ony exits once per group */
static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- return !alu->is_op3 && (
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
- case CHIPREV_EVERGREEN:
- default:
- return !alu->is_op3 && (
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
- }
+ return !alu->is_op3 && bc->op2_inst_info[alu->inst].is_once;
}
static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- return !alu->is_op3 && (
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
- case CHIPREV_EVERGREEN:
- default:
- return !alu->is_op3 && (
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
- }
+ return !alu->is_op3 && bc->op2_inst_info[alu->inst].is_reduction;
}
static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- return !alu->is_op3 &&
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
- case CHIPREV_EVERGREEN:
- default:
- return !alu->is_op3 &&
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
- }
+ return !alu->is_op3 && bc->op2_inst_info[alu->inst].is_cube;
}
static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- return !alu->is_op3 && (
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
- case CHIPREV_EVERGREEN:
- default:
- return !alu->is_op3 && (
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
- }
+ return !alu->is_op3 && bc->op2_inst_info[alu->inst].is_mova;
}
/* alu instructions that can only execute on the vector unit */
@@ -432,70 +364,8 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
/* alu instructions that can only execute on the trans unit */
static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- case CHIPREV_R700:
- if (!alu->is_op3)
- return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
- alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
- else
- return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
- alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
- alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
- alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
- case CHIPREV_EVERGREEN:
- default:
- if (!alu->is_op3)
- /* Note that FLT_TO_INT* instructions are vector instructions
- * on Evergreen, despite what the documentation says. */
- return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
- else
- return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
- }
+ return (!alu->is_op3 && bc->op2_inst_info[alu->inst].is_trans) ||
+ (alu->is_op3 && bc->op3_inst_info[alu->inst].is_trans);
}
/* alu instructions that can execute on any unit */
@@ -1247,24 +1117,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
}
-static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
-{
- switch (bc->chiprev) {
- case CHIPREV_R600:
- return 8;
-
- case CHIPREV_R700:
- return 16;
-
- case CHIPREV_EVERGREEN:
- return 64;
-
- default:
- R600_ERR("Unknown chiprev %d.\n", bc->chiprev);
- return 8;
- }
-}
-
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
{
struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -1290,7 +1142,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+ if ((bc->cf_last->ndw / 4) >= r600_bc_chipref_info[bc->chiprev].tex_and_vtx_inst)
bc->force_add_cf = 1;
return 0;
}
@@ -1337,7 +1189,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+ if ((bc->cf_last->ndw / 4) >= r600_bc_chipref_info[bc->chiprev].tex_and_vtx_inst)
bc->force_add_cf = 1;
return 0;
}
@@ -1484,10 +1336,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- if (bc->chiprev == CHIPREV_R700)
- r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
- else
- r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
+ r600_bc_chipref_info[bc->chiprev].cf_vtx_build(&bc->bytecode[id], cf);
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -1590,10 +1439,7 @@ int r600_bc_build(struct r600_bc *bc)
return -ENOMEM;
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
addr = cf->addr;
- if (bc->chiprev == CHIPREV_EVERGREEN)
- r = eg_bc_cf_build(bc, cf);
- else
- r = r600_bc_cf_build(bc, cf);
+ r = r600_bc_chipref_info[bc->chiprev].cf_build(bc, cf);
if (r)
return r;
switch (cf->inst) {
@@ -1608,18 +1454,7 @@ int r600_bc_build(struct r600_bc *bc)
if (r)
return r;
r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
- switch(bc->chiprev) {
- case CHIPREV_R600:
- r = r600_bc_alu_build(bc, alu, addr);
- break;
- case CHIPREV_R700:
- case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */
- r = r700_bc_alu_build(bc, alu, addr);
- break;
- default:
- R600_ERR("unknown family %d\n", bc->family);
- return -EINVAL;
- }
+ r = r600_bc_chipref_info[bc->chiprev].alu_build(bc, alu, addr);
if (r)
return r;
addr += 2;
@@ -2072,7 +1907,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
struct r600_bc_vtx vtx;
struct pipe_vertex_element *elements = ve->elements;
const struct util_format_description *desc;
- unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
unsigned format, num_format, format_comp;
u32 *bytecode;
int i, r;
@@ -2090,7 +1924,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
}
memset(&bc, 0, sizeof(bc));
- r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
+ r = r600_bc_init(&bc, rctx->family);
if (r)
return r;
@@ -2131,7 +1965,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
/* see above for vbuffer_need_offset explanation */
vbuffer_index = elements[i].vertex_buffer_index;
memset(&vtx, 0, sizeof(vtx));
- vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
+ vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) +
+ r600_bc_chipref_info[bc.chiprev].fetch_resource_start;
vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
@@ -2190,10 +2025,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
r600_bo_unmap(rctx->radeon, ve->fetch_shader);
r600_bc_clear(&bc);
- if (rctx->family >= CHIP_CEDAR)
- eg_cf_vtx(ve);
- else
- r600_cf_vtx(ve);
-
+ r600_bc_chipref_info[bc.chiprev].fetch_shader_build(ve);
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index f9f4d03..0a0825a 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -28,6 +28,15 @@
struct r600_vertex_element;
struct r600_pipe_context;
+struct r600_bc_alu_inst_info {
+ unsigned num_operands;
+ bool is_once;
+ bool is_reduction;
+ bool is_cube;
+ bool is_mova;
+ bool is_trans;
+};
+
struct r600_bc_alu_src {
unsigned sel;
unsigned chan;
@@ -170,25 +179,29 @@ struct r600_cf_callstack {
};
struct r600_bc {
- enum radeon_family family;
- int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
- int type;
- struct list_head cf;
- struct r600_bc_cf *cf_last;
- unsigned ndw;
- unsigned ncf;
- unsigned ngpr;
- unsigned nstack;
- unsigned nresource;
- unsigned force_add_cf;
- u32 *bytecode;
- u32 fc_sp;
- struct r600_cf_stack_entry fc_stack[32];
- unsigned call_sp;
- struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
+ enum radeon_family family;
+ int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
+ const struct r600_bc_alu_inst_info *op2_inst_info;
+ const struct r600_bc_alu_inst_info *op3_inst_info;
+ int type;
+ struct list_head cf;
+ struct r600_bc_cf *cf_last;
+ unsigned ndw;
+ unsigned ncf;
+ unsigned ngpr;
+ unsigned nstack;
+ unsigned nresource;
+ unsigned force_add_cf;
+ u32 *bytecode;
+ u32 fc_sp;
+ struct r600_cf_stack_entry fc_stack[32];
+ unsigned call_sp;
+ struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
};
/* eg_asm.c */
+extern const struct r600_bc_alu_inst_info eg_bc_alu_op2_inst_info[];
+extern const struct r600_bc_alu_inst_info eg_bc_alu_op3_inst_info[];
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
void eg_cf_vtx(struct r600_vertex_element *ve);
--
1.7.1
--=-PYR7oJAhCGiO8cKcKm3+--
More information about the mesa-dev
mailing list