[Mesa-dev] [PATCH] [rfc] r600: set vpm bit for loop start clause (v2)

Dave Airlie airlied at gmail.com
Tue Aug 21 19:50:08 UTC 2018


From: Dave Airlie <airlied at redhat.com>

This fixes some hangs with the arb_shader_image_load_store-atomicity tests
on evergreen/cayman GPUs.

I'm not 100% sure why (VPM hurts my brain), I'm running some piglit
runs to see if it has any bad side effects.

v2: only set the vpm flags when an atomic operation is done.
---
 src/gallium/drivers/r600/r600_asm.h    |  1 +
 src/gallium/drivers/r600/r600_shader.c | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 5841044bf81..366530573de 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -224,6 +224,7 @@ struct r600_cf_stack_entry {
 	struct r600_bytecode_cf		*start;
 	struct r600_bytecode_cf		**mid; /* used to store the else point */
 	int				num_mid;
+	bool need_vpm;
 };
 
 #define SQ_MAX_CALL_DEPTH 0x00000020
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 2229dc8fab3..e93dbd3970c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -379,6 +379,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[],
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx);
 static int tgsi_else(struct r600_shader_ctx *ctx);
 static int tgsi_endif(struct r600_shader_ctx *ctx);
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
@@ -9125,6 +9126,8 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
 	unsigned immed_base;
 	unsigned rat_base;
 
+	fc_level_needs_vpm(ctx);
+
 	immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
 	rat_base = ctx->shader->rat_base;
 
@@ -9284,6 +9287,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
 		return -1;
 	}
 
+	fc_level_needs_vpm(ctx);
+
 	r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
 	if (r)
 		return r;
@@ -9405,6 +9410,8 @@ static int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
 	int lds_op = get_lds_op(inst->Instruction.Opcode);
 	int r;
 
+	fc_level_needs_vpm(ctx);
+
 	struct r600_bytecode_alu alu;
 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.op = lds_op;
@@ -10433,9 +10440,16 @@ static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
 	assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
 	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
 	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].need_vpm = false;
 	ctx->bc->fc_sp++;
 }
 
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx)
+{
+	if (ctx->bc->fc_sp)
+		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm = true;
+}
+
 static void fc_poplevel(struct r600_shader_ctx *ctx)
 {
 	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
@@ -10576,6 +10590,8 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 	} else {
 		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset;
 	}
+	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+		ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
 	fc_poplevel(ctx);
 
 	callstack_pop(ctx, FC_PUSH_VPM);
@@ -10587,7 +10603,6 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
 	/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
 	 * limited to 4096 iterations, like the other LOOP_* instructions. */
 	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
-
 	fc_pushlevel(ctx, FC_LOOP);
 
 	/* check stack depth */
@@ -10612,6 +10627,8 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
 	   BRK/CONT point to LOOP END CF
 	*/
 	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
+	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+		ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
 
 	ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
 
-- 
2.17.1



More information about the mesa-dev mailing list