[Mesa-dev] [PATCH 12/53] r600: add support for LDS instruction encoding.

Dave Airlie airlied at gmail.com
Sun Nov 29 22:20:21 PST 2015


From: Dave Airlie <airlied at redhat.com>

These are used in tessellation shaders to read/write values
between VS/TCS/TES.

This splits the eg alu assembler out to handle these
instructions.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/eg_asm.c   | 75 +++++++++++++++++++++++++++++++++++++
 src/gallium/drivers/r600/eg_sq.h    | 39 +++++++++++++++++++
 src/gallium/drivers/r600/r600_asm.c | 28 +++++++++++++-
 src/gallium/drivers/r600/r600_asm.h |  4 ++
 4 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index f555649..46683c1 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -216,3 +216,78 @@ int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gd
 		S_SQ_MEM_GDS_WORD2_DST_SEL_W(gds->dst_sel_w);
 	return 0;
 }
+
+int eg_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
+{
+	if (alu->is_lds_idx_op) {
+		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
+		assert(!alu->src[0].neg && !alu->src[1].neg && !alu->src[2].neg);
+		bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+			S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
+			S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+			S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_4(alu->lds_idx >> 4) |
+			S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+			S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
+			S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+			S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_5(alu->lds_idx >> 5) |
+			S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
+			S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
+			S_SQ_ALU_WORD0_LAST(alu->last);
+	} else {
+		bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+			S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
+			S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+			S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+			S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+			S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
+			S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+			S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+			S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
+			S_SQ_ALU_WORD0_LAST(alu->last);
+	}
+
+	/* don't replace gpr by pv or ps for destination register */
+	if (alu->is_lds_idx_op) {
+		unsigned lds_op = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
+		bc->bytecode[id++] =
+			S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+			S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
+			S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
+			S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_1(alu->lds_idx >> 1) |
+
+			S_SQ_ALU_WORD1_OP3_ALU_INST(lds_op & 0xff) |
+			S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
+			S_SQ_ALU_WORD1_LDS_IDX_OP_LDS_OP((lds_op >> 8) & 0xff) |
+			S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_0(alu->lds_idx) |
+			S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_2(alu->lds_idx >> 2) |
+			S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+			S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_3(alu->lds_idx >> 3);
+
+	} else if (alu->is_op3) {
+		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
+		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+			                S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
+			                S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
+					S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+					S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
+					S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
+					S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
+					S_SQ_ALU_WORD1_OP3_ALU_INST(r600_isa_alu_opcode(bc->isa->hw_class, alu->op)) |
+					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
+	} else {
+		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
+					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+			                S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
+			                S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
+					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
+					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
+					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
+					S_SQ_ALU_WORD1_OP2_ALU_INST(r600_isa_alu_opcode(bc->isa->hw_class, alu->op)) |
+					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
+			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
+			                S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
+	}
+	return 0;
+}
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index 3074cfe..c118d3a 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -535,6 +535,45 @@
 #define   S_SQ_MEM_GDS_WORD2_DST_SEL_Z(x)                            (((x) & 0x7) << 6)
 #define   S_SQ_MEM_GDS_WORD2_DST_SEL_W(x)                            (((x) & 0x7) << 9)
 
+/* LDS IDX redefines the neg bits on op3 */
+#define S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x)                    (((x) & 0x1) << 12)
+#define S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x)                    (((x) & 0x1) << 25)
+
+/* src2 neg */
+#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x)                    (((x) & 0x1) << 12)
+
+/* this was dst gpr */
+#define S_SQ_ALU_WORD1_LDS_IDX_OP_LDS_OP(x)                    (((x) & 0x3f) << 21)
+#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x)                    (((x) & 0x1) << 27)
+
+/* this was dst rel */
+#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x)                    (((x) & 0x1) << 28)
+/* this was clamp */
+#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x)                    (((x) & 0x1) << 31)
+
+#define V_SQ_LDS_INST_ADD                               0x00
+#define V_SQ_LDS_INST_SUB                               0x01
+#define V_SQ_LDS_INST_RSUB                              0x02
+
+#define V_SQ_LDS_INST_INC                               0x03
+#define V_SQ_LDS_INST_DEC                               0x04
+#define V_SQ_LDS_INST_MIN_INT                           0x05
+#define V_SQ_LDS_INST_MAX_INT                           0x06
+#define V_SQ_LDS_INST_MIN_UINT                          0x07
+#define V_SQ_LDS_INST_MAX_UINT                          0x08
+#define V_SQ_LDS_INST_AND                               0x09
+#define V_SQ_LDS_INST_OR                                0x0a
+#define V_SQ_LDS_INST_XOR                               0x0b
+#define V_SQ_LDS_INST_MSKOR                             0x0c
+#define V_SQ_LDS_INST_WRITE                             0x0d
+#define V_SQ_LDS_INST_WRITE_REL                         0x0e
+#define V_SQ_LDS_INST_WRITE2                            0x0f
+
+#define V_SQ_LDS_INST_READ_RET                          0x32
+#define V_SQ_LDS_INST_READ_REL_RET                      0x33
+#define V_SQ_LDS_INST_READ2_RET                         0x34
+#define V_SQ_LDS_INST_READWRITE_RET                     0x35
+
 #define V_SQ_CF_COND_ACTIVE                             0x00
 #define V_SQ_CF_COND_FALSE                              0x01
 #define V_SQ_CF_COND_BOOL                               0x02
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 41e9c19..29515f2 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1714,10 +1714,12 @@ int r600_bytecode_build(struct r600_bytecode *bc)
 					r = r600_bytecode_alu_build(bc, alu, addr);
 					break;
 				case R700:
-				case EVERGREEN: /* eg alu is same encoding as r700 */
-				case CAYMAN:
 					r = r700_bytecode_alu_build(bc, alu, addr);
 					break;
+				case EVERGREEN:
+				case CAYMAN:
+					r = eg_bytecode_alu_build(bc, alu, addr);
+					break;
 				default:
 					R600_ERR("unknown chip class %d.\n", bc->chip_class);
 					return -EINVAL;
@@ -1904,6 +1906,28 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
 		need_sel = 0;
 		need_chan = 0;
 		switch (sel) {
+		case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
+			o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
+			break;
+		case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
+			o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
+			break;
+		case EG_V_SQ_ALU_SRC_LDS_OQ_A:
+			o += fprintf(stderr, "LDS_OQ_A");
+			need_chan = 1;
+			break;
+		case EG_V_SQ_ALU_SRC_LDS_OQ_B:
+			o += fprintf(stderr, "LDS_OQ_B");
+			need_chan = 1;
+			break;
+		case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
+			o += fprintf(stderr, "LDS_OQ_A_POP");
+			need_chan = 1;
+			break;
+		case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
+			o += fprintf(stderr, "LDS_OQ_B_POP");
+			need_chan = 1;
+			break;
 		case V_SQ_ALU_SRC_PS:
 			o += fprintf(stderr, "PS");
 			break;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index f786bab..0b78290 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -52,6 +52,7 @@ struct r600_bytecode_alu {
 	unsigned			op;
 	unsigned			last;
 	unsigned			is_op3;
+	unsigned			is_lds_idx_op;
 	unsigned			execute_mask;
 	unsigned			update_pred;
 	unsigned			pred_sel;
@@ -59,6 +60,7 @@ struct r600_bytecode_alu {
 	unsigned			bank_swizzle_force;
 	unsigned			omod;
 	unsigned                        index_mode;
+	unsigned                        lds_idx;
 };
 
 struct r600_bytecode_tex {
@@ -253,6 +255,8 @@ struct r600_bytecode {
 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
 int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause);
 int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
+int eg_bytecode_alu_build(struct r600_bytecode *bc,
+			  struct r600_bytecode_alu *alu, unsigned id);
 /* r600_asm.c */
 void r600_bytecode_init(struct r600_bytecode *bc,
 			enum chip_class chip_class,
-- 
2.5.0



More information about the mesa-dev mailing list