[Mesa-dev] [PATCH 07/11] r600/shader: add support for load/store/atomic ops on images.
Dave Airlie
airlied at gmail.com
Wed Nov 15 00:11:08 UTC 2017
From: Dave Airlie <airlied at redhat.com>
This adds support to the shader assembler for load/store/atomic
ops on images which are handled via the RAT operations.
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/gallium/drivers/r600/r600_shader.c | 316 ++++++++++++++++++++++++++++++++-
1 file changed, 312 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 73baa07..d7cb52e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -969,6 +969,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SAMPLER:
case TGSI_FILE_SAMPLER_VIEW:
case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_IMAGE:
break;
case TGSI_FILE_HW_ATOMIC:
@@ -3074,6 +3075,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
case PIPE_SHADER_FRAGMENT:
shader->two_side = key.ps.color_two_side;
shader->atomic_base = key.ps.first_atomic_counter;
+ shader->rat_base = key.ps.nr_cbufs;
break;
default:
break;
@@ -7779,14 +7781,318 @@ static int tgsi_load_gds(struct r600_shader_ctx *ctx)
return 0;
}
+/* this fixes up 1D arrays properly */
+static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r, i;
+ struct r600_bytecode_alu alu;
+ int temp_reg = r600_get_temp(ctx);
+
+ for (i = 0; i < 4; i++) {
+ bool def_val = true, write_zero = false;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = temp_reg;
+ alu.dst.chan = i;
+
+ switch (inst->Memory.Texture) {
+ case TGSI_TEXTURE_BUFFER:
+ case TGSI_TEXTURE_1D:
+ if (i == 1 || i == 2 || i == 3) {
+ write_zero = true;
+ }
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ if (i == 1 || i == 3)
+ write_zero = true;
+ else if (i == 2) {
+ r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1);
+ def_val = false;
+ }
+ break;
+ case TGSI_TEXTURE_2D:
+ if (i == 2 || i == 3)
+ write_zero = true;
+ break;
+ default:
+ if (i == 3)
+ write_zero = true;
+ break;
+ }
+
+ if (write_zero) {
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ } else if (def_val) {
+ r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i);
+ }
+
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ *idx_gpr = temp_reg;
+ return 0;
+}
+
+static int tgsi_load_rat(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ /* have to work out the offset into the RAT immediate return buffer */
+ struct r600_bytecode_vtx vtx;
+ struct r600_bytecode_cf *cf;
+ int r;
+ int idx_gpr;
+ unsigned format, num_format, format_comp, endian;
+ const struct util_format_description *desc;
+ unsigned rat_index_mode;
+ unsigned immed_base;
+
+ rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+
+ immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
+ r = load_index_src(ctx, 1, &idx_gpr);
+ if (r)
+ return r;
+
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
+ cf = ctx->bc->cf_last;
+
+ cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
+ cf->rat.inst = V_RAT_INST_NOP_RTN;
+ cf->rat.index_mode = rat_index_mode;
+ cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
+ cf->output.gpr = ctx->thread_id_gpr;
+ cf->output.index_gpr = idx_gpr;
+ cf->output.comp_mask = 0xf;
+ cf->output.burst_count = 1;
+ cf->vpm = 1;
+ cf->barrier = 1;
+ cf->mark = 1;
+ cf->output.elem_size = 0;
+
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
+ cf = ctx->bc->cf_last;
+ cf->barrier = 1;
+
+ desc = util_format_description(inst->Memory.Format);
+ r600_vertex_data_type(inst->Memory.Format,
+ &format, &num_format, &format_comp, &endian);
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
+ vtx.buffer_index_mode = rat_index_mode;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.src_gpr = ctx->thread_id_gpr;
+ vtx.src_sel_x = 1;
+ vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ vtx.dst_sel_x = desc->swizzle[0];
+ vtx.dst_sel_y = desc->swizzle[1];
+ vtx.dst_sel_z = desc->swizzle[2];
+ vtx.dst_sel_w = desc->swizzle[3];
+ vtx.srf_mode_all = 1;
+ vtx.data_format = format;
+ vtx.num_format_all = num_format;
+ vtx.format_comp_all = format_comp;
+ vtx.endian = endian;
+ vtx.offset = 0;
+ vtx.mega_fetch_count = 3;
+ r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
+ if (r)
+ return r;
+ cf = ctx->bc->cf_last;
+ cf->barrier = 1;
+ return 0;
+}
+
static int tgsi_load(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ return tgsi_load_rat(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
return tgsi_load_gds(ctx);
return 0;
}
+static int tgsi_store_rat(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_cf *cf;
+ bool src_requires_loading = false;
+ int val_gpr, idx_gpr;
+ int r, i;
+ unsigned rat_index_mode;
+
+ rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+
+ r = load_index_src(ctx, 0, &idx_gpr);
+ if (r)
+ return r;
+
+ if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY)
+ src_requires_loading = true;
+
+ if (src_requires_loading) {
+ struct r600_bytecode_alu alu;
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ val_gpr = ctx->temp_reg;
+ } else
+ val_gpr = tgsi_tex_get_src_gpr(ctx, 1);
+ if (rat_index_mode)
+ egcm_load_index_reg(ctx->bc, 1, false);
+
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
+ cf = ctx->bc->cf_last;
+
+ cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
+ cf->rat.inst = V_RAT_INST_STORE_TYPED;
+ cf->rat.index_mode = rat_index_mode;
+ cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
+ cf->output.gpr = val_gpr;
+ cf->output.index_gpr = idx_gpr;
+ cf->output.comp_mask = 0xf;
+ cf->output.burst_count = 1;
+ cf->vpm = 1;
+ cf->barrier = 1;
+ cf->output.elem_size = 0;
+ return 0;
+}
+
+static int tgsi_store(struct r600_shader_ctx *ctx)
+{
+ return tgsi_store_rat(ctx);
+}
+
+static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ /* have to work out the offset into the RAT immediate return buffer */
+ struct r600_bytecode_alu alu;
+ struct r600_bytecode_vtx vtx;
+ struct r600_bytecode_cf *cf;
+ int r;
+ int idx_gpr;
+ unsigned format, num_format, format_comp, endian;
+ const struct util_format_description *desc;
+ unsigned rat_index_mode;
+ unsigned immed_base;
+
+ immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
+
+ assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE);
+ rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+
+ r = load_index_src(ctx, 1, &idx_gpr);
+ if (r)
+ return r;
+
+ if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->thread_id_gpr;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->thread_id_gpr;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->thread_id_gpr;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (rat_index_mode)
+ egcm_load_index_reg(ctx->bc, 1, false);
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
+ cf = ctx->bc->cf_last;
+
+ cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
+ cf->rat.inst = ctx->inst_info->op;
+ cf->rat.index_mode = rat_index_mode;
+ cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
+ cf->output.gpr = ctx->thread_id_gpr;
+ cf->output.index_gpr = idx_gpr;
+ cf->output.comp_mask = 0xf;
+ cf->output.burst_count = 1;
+ cf->vpm = 1;
+ cf->barrier = 1;
+ cf->mark = 1;
+ cf->output.elem_size = 0;
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
+ cf = ctx->bc->cf_last;
+ cf->barrier = 1;
+ cf->cf_addr = 1;
+
+ desc = util_format_description(inst->Memory.Format);
+ r600_vertex_data_type(inst->Memory.Format,
+ &format, &num_format, &format_comp, &endian);
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
+ vtx.buffer_index_mode = rat_index_mode;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.src_gpr = ctx->thread_id_gpr;
+ vtx.src_sel_x = 1;
+ vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ vtx.dst_sel_x = desc->swizzle[0];
+ vtx.dst_sel_y = 7;
+ vtx.dst_sel_z = 7;
+ vtx.dst_sel_w = 7;
+ vtx.use_const_fields = 0;
+ vtx.srf_mode_all = 1;
+ vtx.data_format = format;
+ vtx.num_format_all = num_format;
+ vtx.format_comp_all = format_comp;
+ vtx.endian = endian;
+ vtx.offset = 0;
+ vtx.mega_fetch_count = 0xf;
+ r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
+ if (r)
+ return r;
+ cf = ctx->bc->cf_last;
+ cf->vpm = 1;
+ cf->barrier = 1;
+ return 0;
+}
+
static int get_gds_op(int opcode)
{
switch (opcode) {
@@ -7885,6 +8191,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ return tgsi_atomic_op_rat(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
return tgsi_atomic_op_gds(ctx);
return 0;
@@ -9547,7 +9855,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
[115] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9597,7 +9905,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
- [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
[163] = { ALU_OP0_NOP, tgsi_unsupported},
[164] = { ALU_OP0_NOP, tgsi_unsupported},
[165] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9770,7 +10078,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
[115] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9820,7 +10128,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
- [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
[163] = { ALU_OP0_NOP, tgsi_unsupported},
[164] = { ALU_OP0_NOP, tgsi_unsupported},
[165] = { ALU_OP0_NOP, tgsi_unsupported},
--
2.9.5
More information about the mesa-dev
mailing list