[Mesa-dev] [PATCH] [rfc] r600: set vpm bit for loop start clause (v2)
Dave Airlie
airlied at gmail.com
Tue Aug 21 19:50:08 UTC 2018
From: Dave Airlie <airlied at redhat.com>
This fixes some hangs with the arb_shader_image_load_store-atomicity tests
on evergreen/cayman GPUs.
I'm not 100% sure why (VPM hurts my brain), I'm running some piglit
runs to see if it has any bad side effects.
v2: only set the vpm flags when an atomic operation is done.
---
src/gallium/drivers/r600/r600_asm.h | 1 +
src/gallium/drivers/r600/r600_shader.c | 19 ++++++++++++++++++-
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 5841044bf81..366530573de 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -224,6 +224,7 @@ struct r600_cf_stack_entry {
struct r600_bytecode_cf *start;
struct r600_bytecode_cf **mid; /* used to store the else point */
int num_mid;
+ bool need_vpm;
};
#define SQ_MAX_CALL_DEPTH 0x00000020
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 2229dc8fab3..e93dbd3970c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -379,6 +379,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[],
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx);
static int tgsi_else(struct r600_shader_ctx *ctx);
static int tgsi_endif(struct r600_shader_ctx *ctx);
static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
@@ -9125,6 +9126,8 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
unsigned immed_base;
unsigned rat_base;
+ fc_level_needs_vpm(ctx);
+
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
rat_base = ctx->shader->rat_base;
@@ -9284,6 +9287,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
return -1;
}
+ fc_level_needs_vpm(ctx);
+
r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
if (r)
return r;
@@ -9405,6 +9410,8 @@ static int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
int lds_op = get_lds_op(inst->Instruction.Opcode);
int r;
+ fc_level_needs_vpm(ctx);
+
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = lds_op;
@@ -10433,9 +10440,16 @@ static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].need_vpm = false;
ctx->bc->fc_sp++;
}
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx)
+{
+ if (ctx->bc->fc_sp)
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm = true;
+}
+
static void fc_poplevel(struct r600_shader_ctx *ctx)
{
struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
@@ -10576,6 +10590,8 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
} else {
ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset;
}
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+ ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
fc_poplevel(ctx);
callstack_pop(ctx, FC_PUSH_VPM);
@@ -10587,7 +10603,6 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
* limited to 4096 iterations, like the other LOOP_* instructions. */
r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
-
fc_pushlevel(ctx, FC_LOOP);
/* check stack depth */
@@ -10612,6 +10627,8 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
BRK/CONT point to LOOP END CF
*/
ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+ ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
--
2.17.1
More information about the mesa-dev
mailing list