[Mesa-dev] [PATCH v2 2/9] gallium/radeon: add r600_gfx_{write, wait}_fence

Nicolai Hähnle nhaehnle at gmail.com
Thu Sep 22 12:28:11 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

For bottom-of-pipe fences inside the gfx command stream.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 52 +++++++++++++++++++++++++++
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +++
 src/gallium/drivers/radeonsi/si_perfcounter.c | 41 ++-------------------
 3 files changed, 60 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index b0d9813..b681a94 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -73,20 +73,72 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b)
 	FREE(b->global_symbol_offsets);
 	FREE(b->relocs);
 	FREE(b->disasm_string);
 	FREE(b->llvm_ir_string);
 }
 
 /*
  * pipe_context
  */
 
+void r600_gfx_write_fence(struct r600_common_context *ctx,
+			  uint64_t va, uint32_t old_value, uint32_t new_value)
+{
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+	if (ctx->chip_class == CIK) {
+		/* Two EOP events are required to make all engines go idle
+		 * (and optional cache flushes executed) before the timestamp
+		 * is written.
+		 */
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
+				EVENT_INDEX(5));
+		radeon_emit(cs, va);
+		radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
+		radeon_emit(cs, old_value); /* immediate data */
+		radeon_emit(cs, 0); /* unused */
+	}
+
+	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
+			EVENT_INDEX(5));
+	radeon_emit(cs, va);
+	radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
+	radeon_emit(cs, new_value); /* immediate data */
+	radeon_emit(cs, 0); /* unused */
+}
+
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
+{
+	unsigned dwords = 6;
+
+	if (screen->chip_class == CIK)
+		dwords *= 2;
+
+	return dwords;
+}
+
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+			 uint64_t va, uint32_t ref, uint32_t mask)
+{
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit(cs, ref); /* reference value */
+	radeon_emit(cs, mask); /* mask */
+	radeon_emit(cs, 4); /* poll interval */
+}
+
 void r600_draw_rectangle(struct blitter_context *blitter,
 			 int x1, int y1, int x2, int y2, float depth,
 			 enum blitter_attrib_type type,
 			 const union pipe_color_union *attrib)
 {
 	struct r600_common_context *rctx =
 		(struct r600_common_context*)util_blitter_get_pipe(blitter);
 	struct pipe_viewport_state viewport;
 	struct pipe_resource *buf = NULL;
 	unsigned offset = 0;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index dd33eab..96b23b2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -688,20 +688,25 @@ struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
 						  unsigned alignment);
 struct pipe_resource *
 r600_buffer_from_user_memory(struct pipe_screen *screen,
 			     const struct pipe_resource *templ,
 			     void *user_memory);
 void
 r600_invalidate_resource(struct pipe_context *ctx,
 			 struct pipe_resource *resource);
 
 /* r600_common_pipe.c */
+void r600_gfx_write_fence(struct r600_common_context *ctx,
+			  uint64_t va, uint32_t old_value, uint32_t new_value);
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+			 uint64_t va, uint32_t ref, uint32_t mask);
 void r600_draw_rectangle(struct blitter_context *blitter,
 			 int x1, int y1, int x2, int y2, float depth,
 			 enum blitter_attrib_type type,
 			 const union pipe_color_union *attrib);
 bool r600_common_screen_init(struct r600_common_screen *rscreen,
 			     struct radeon_winsys *ws);
 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
 void r600_preflush_suspend_features(struct r600_common_context *ctx);
 void r600_postflush_resume_features(struct r600_common_context *ctx);
 bool r600_common_context_init(struct r600_common_context *rctx,
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0ced617..d0c5392 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -584,53 +584,22 @@ static void si_pc_emit_start(struct r600_common_context *ctx,
 			       S_036020_PERFMON_STATE(V_036020_START_COUNTING));
 }
 
 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
  * do it again in here. */
 static void si_pc_emit_stop(struct r600_common_context *ctx,
 			    struct r600_resource *buffer, uint64_t va)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
-	if (ctx->screen->chip_class == CIK) {
-		/* Two EOP events are required to make all engines go idle
-		 * (and optional cache flushes executed) before the timestamp
-		 * is written.
-		 *
-		 * Write 1, because we need to wait for the second EOP event.
-		 */
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-				EVENT_INDEX(5));
-		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-		radeon_emit(cs, 1); /* immediate data */
-		radeon_emit(cs, 0); /* unused */
-	}
-
-	/* Write 0. */
-	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-			EVENT_INDEX(5));
-	radeon_emit(cs, va);
-	radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-	radeon_emit(cs, 0); /* immediate data */
-	radeon_emit(cs, 0); /* unused */
-
-	/* Wait until the memory location is 0. */
-	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, 0); /* reference value */
-	radeon_emit(cs, 0xffffffff); /* mask */
-	radeon_emit(cs, 4); /* poll interval */
+	r600_gfx_write_fence(ctx, va, 1, 0);
+	r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
 
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0));
 	radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
 			       S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
 			       S_036020_PERFMON_SAMPLE_ENABLE(1));
 }
 
@@ -712,28 +681,24 @@ void si_init_perfcounters(struct si_screen *screen)
 		fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not "
 			"supported (inaccurate performance counters)\n",
 			screen->b.info.max_sh_per_se);
 	}
 
 	pc = CALLOC_STRUCT(r600_perfcounters);
 	if (!pc)
 		return;
 
 	pc->num_start_cs_dwords = 14;
-	pc->num_stop_cs_dwords = 20;
+	pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
 	pc->num_instance_cs_dwords = 3;
 	pc->num_shaders_cs_dwords = 4;
 
-	if (screen->b.chip_class == CIK) {
-		pc->num_stop_cs_dwords += 6;
-	}
-
 	pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
 	pc->shader_type_suffixes = si_pc_shader_type_suffixes;
 	pc->shader_type_bits = si_pc_shader_type_bits;
 
 	pc->get_size = si_pc_get_size;
 	pc->emit_instance = si_pc_emit_instance;
 	pc->emit_shaders = si_pc_emit_shaders;
 	pc->emit_select = si_pc_emit_select;
 	pc->emit_start = si_pc_emit_start;
 	pc->emit_stop = si_pc_emit_stop;
-- 
2.7.4



More information about the mesa-dev mailing list