[Mesa-dev] [PATCH 08/21] r600/shader: add local memory support to shader assembler.
Dave Airlie
airlied at gmail.com
Wed Nov 29 04:36:17 UTC 2017
From: Dave Airlie <airlied at redhat.com>
This is needed for compute shaders.
v1.1: make work for vectors, fix missing lds ops.
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/gallium/drivers/r600/r600_shader.c | 165 +++++++++++++++++++++++++++++++++
1 file changed, 165 insertions(+)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e72215f..83b70b0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -970,6 +970,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_ADDRESS:
case TGSI_FILE_BUFFER:
case TGSI_FILE_IMAGE:
+ case TGSI_FILE_MEMORY:
break;
case TGSI_FILE_HW_ATOMIC:
@@ -8032,6 +8033,30 @@ static int tgsi_load_rat(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_load_lds(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ int temp_reg = r600_get_temp(ctx);
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+ alu.dst.sel = temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ r = do_lds_fetch_values(ctx, temp_reg,
+ ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask);
+ if (r)
+ return r;
+ return 0;
+}
+
static int tgsi_load(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8041,6 +8066,8 @@ static int tgsi_load(struct r600_shader_ctx *ctx)
return tgsi_load_gds(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
return tgsi_load_buffer(ctx);
+ if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+ return tgsi_load_lds(ctx);
return 0;
}
@@ -8188,11 +8215,82 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_store_lds(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r, i, lasti;
+ int write_mask = inst->Dst[0].Register.WriteMask;
+ int temp_reg = r600_get_temp(ctx);
+
+ /* LDS write */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ lasti = tgsi_last_instruction(write_mask);
+ for (i = 1; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ temp_reg, i,
+ temp_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, 4 * i);
+ if (r)
+ return r;
+ }
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ if ((i == 0 && ((write_mask & 3) == 3)) ||
+ (i == 2 && ((write_mask & 0xc) == 0xc))) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = LDS_OP3_LDS_WRITE_REL;
+
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = i;
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+ r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1);
+ alu.last = 1;
+ alu.is_lds_idx_op = true;
+ alu.lds_idx = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ i += 1;
+ continue;
+ }
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = LDS_OP2_LDS_WRITE;
+
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = i;
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+
+ alu.last = 1;
+ alu.is_lds_idx_op = true;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_store(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
return tgsi_store_buffer_rat(ctx);
+ else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
+ return tgsi_store_lds(ctx);
else
return tgsi_store_rat(ctx);
}
@@ -8410,6 +8508,71 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
return 0;
}
+static int get_lds_op(int opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ return LDS_OP2_LDS_ADD_RET;
+ case TGSI_OPCODE_ATOMAND:
+ return LDS_OP2_LDS_AND_RET;
+ case TGSI_OPCODE_ATOMOR:
+ return LDS_OP2_LDS_OR_RET;
+ case TGSI_OPCODE_ATOMXOR:
+ return LDS_OP2_LDS_XOR_RET;
+ case TGSI_OPCODE_ATOMUMIN:
+ return LDS_OP2_LDS_MIN_UINT_RET;
+ case TGSI_OPCODE_ATOMUMAX:
+ return LDS_OP2_LDS_MAX_UINT_RET;
+ case TGSI_OPCODE_ATOMIMIN:
+ return LDS_OP2_LDS_MIN_INT_RET;
+ case TGSI_OPCODE_ATOMIMAX:
+ return LDS_OP2_LDS_MAX_INT_RET;
+ case TGSI_OPCODE_ATOMXCHG:
+ return LDS_OP2_LDS_XCHG_RET;
+ case TGSI_OPCODE_ATOMCAS:
+ return LDS_OP3_LDS_CMP_XCHG_RET;
+ default:
+ return -1;
+ }
+}
+
+static int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int lds_op = get_lds_op(inst->Instruction.Opcode);
+ int r;
+
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = lds_op;
+ alu.is_lds_idx_op = true;
+ alu.last = 1;
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], 0);
+ if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET)
+ r600_bytecode_src(&alu.src[2], &ctx->src[3], 0);
+ else
+ alu.src[2].sel = V_SQ_ALU_SRC_0;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* then read from LDS_OQ_A_POP */
+ memset(&alu, 0, sizeof(alu));
+
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+ alu.src[0].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ return 0;
+}
+
static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8419,6 +8582,8 @@ static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
return tgsi_atomic_op_gds(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
return tgsi_atomic_op_rat(ctx);
+ if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+ return tgsi_atomic_op_lds(ctx);
return 0;
}
--
2.9.5
More information about the mesa-dev
mailing list