Mesa (master): r600g: Implement scratch buffer state management (v2)

Dave Airlie airlied at kemper.freedesktop.org
Thu Feb 8 23:53:35 UTC 2018


Module: Mesa
Branch: master
Commit: 6b4303f358494e6c39476291c539af50d730910a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b4303f358494e6c39476291c539af50d730910a

Author: Glenn Kennard <glenn.kennard at gmail.com>
Date:   Sun Mar  5 18:26:51 2017 +0100

r600g: Implement scratch buffer state management (v2)

v2: add Glenn's fixes

Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
Reviewed-by: Dave Airlie <airlied at redhat.com>

---

 src/gallium/drivers/r600/evergreen_state.c   |  24 ++++++
 src/gallium/drivers/r600/r600_hw_context.c   |   4 +
 src/gallium/drivers/r600/r600_pipe.c         |   3 +
 src/gallium/drivers/r600/r600_pipe.h         |  16 +++-
 src/gallium/drivers/r600/r600_shader.h       |   1 +
 src/gallium/drivers/r600/r600_state_common.c | 105 +++++++++++++++++++++++++++
 6 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index fcd742c5f9..48934158bd 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2297,6 +2297,30 @@ static void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struc
 					0);
 }
 
+void evergreen_setup_scratch_buffers(struct r600_context *rctx) {
+	static const struct {
+		unsigned ring_base;
+		unsigned item_size;
+		unsigned ring_size;
+	} regs[EG_NUM_HW_STAGES] = {
+		[R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+		[R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+		[R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+		[R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE },
+		[EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE },
+		[EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE }
+	};
+
+	for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) {
+		struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader;
+
+		if (stage && unlikely(stage->scratch_space_needed)) {
+			r600_setup_scratch_area_for_shader(rctx, stage,
+				&rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size);
+		}
+	}
+}
+
 static void evergreen_emit_sampler_views(struct r600_context *rctx,
 					 struct r600_samplerview_state *state,
 					 unsigned resource_id_base, unsigned pkt_flags)
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 4b6d951af6..3ce1825104 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -415,6 +415,10 @@ void r600_begin_new_cs(struct r600_context *ctx)
 		r600_sampler_states_dirty(ctx, &samplers->states);
 	}
 
+	for (shader = 0; shader < ARRAY_SIZE(ctx->scratch_buffers); shader++) {
+		ctx->scratch_buffers[shader].dirty = true;
+	}
+
 	r600_postflush_resume_features(&ctx->b);
 
 	/* Re-emit the draw state. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 6c021e568d..cc35d86709 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -71,6 +71,9 @@ static void r600_destroy_context(struct pipe_context *context)
 
 	r600_sb_context_destroy(rctx->sb_context);
 
+	for (sh = 0; sh < (rctx->b.chip_class < EVERGREEN ? R600_NUM_HW_STAGES : EG_NUM_HW_STAGES); sh++) {
+		r600_resource_reference(&rctx->scratch_buffers[sh].buffer, NULL);
+	}
 	r600_resource_reference(&rctx->dummy_cmask, NULL);
 	r600_resource_reference(&rctx->dummy_fmask, NULL);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 740b50aec5..6d09093052 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -478,6 +478,14 @@ struct r600_image_state {
 	struct r600_image_view views[R600_MAX_IMAGES];
 };
 
+/* Used to spill shader temps */
+struct r600_scratch_buffer {
+	struct r600_resource		*buffer;
+	boolean					dirty;
+	unsigned				size;
+	unsigned				item_size;
+};
+
 struct r600_context {
 	struct r600_common_context	b;
 	struct r600_screen		*screen;
@@ -594,6 +602,8 @@ struct r600_context {
 	unsigned last_num_tcs_input_cp;
 	unsigned lds_alloc;
 
+	struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, EG_NUM_HW_STAGES)];
+
 	/* Debug state. */
 	bool			is_debug;
 	struct radeon_saved_cs	last_gfx;
@@ -703,7 +713,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
 					struct r600_surface *surf);
 void evergreen_update_db_shader_control(struct r600_context * rctx);
 bool evergreen_adjust_gprs(struct r600_context *rctx);
-
+void evergreen_setup_scratch_buffers(struct r600_context *rctx);
 uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a,
 				      unsigned nr_cbufs);
 /* r600_blit.c */
@@ -754,6 +764,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 				 unsigned sample_count,
 				 unsigned usage);
 void r600_update_db_shader_control(struct r600_context * rctx);
+void r600_setup_scratch_buffers(struct r600_context *rctx);
 
 /* r600_hw_context.c */
 void r600_context_gfx_flush(void *context, unsigned flags,
@@ -819,6 +830,9 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
 			       struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
+void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
+	struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch,
+	unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 4b23facf6f..b837d1309a 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -180,6 +180,7 @@ struct r600_pipe_shader {
 	unsigned		db_shader_control;
 	unsigned		ps_depth_export;
 	unsigned		enabled_stream_buffers_mask;
+	unsigned		scratch_space_needed; /* size of scratch space (if > 0) counted in vec4 */
 };
 
 /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b697f2e24e..9994f476cc 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1610,6 +1610,104 @@ void r600_update_compressed_resource_state(struct r600_context *rctx, bool compu
 	}
 }
 
+/* update MEM_SCRATCH buffers if needed */
+void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
+	struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch,
+	unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg)
+{
+	unsigned num_ses = rctx->screen->b.info.max_se;
+	unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
+	unsigned nthreads = 128;
+
+	unsigned itemsize = shader->scratch_space_needed * 4;
+	unsigned size = align(itemsize * nthreads * num_pipes * num_ses * 4, 256);
+
+	if (scratch->dirty ||
+		unlikely(shader->scratch_space_needed != scratch->item_size ||
+		size > scratch->size)) {
+		struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+
+		scratch->dirty = false;
+
+		if (size > scratch->size) {
+			// Release prior one if any
+			if (scratch->buffer) {
+				pipe_resource_reference((struct pipe_resource**)&scratch->buffer, NULL);
+			}
+
+			scratch->buffer = (struct r600_resource *)pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
+				PIPE_USAGE_DEFAULT, size);
+			if (scratch->buffer) {
+				scratch->size = size;
+			}
+		}
+
+		scratch->item_size = shader->scratch_space_needed;
+
+		radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+
+		// multi-SE chips need programming per SE
+		for (unsigned se = 0; se < num_ses; se++) {
+			struct r600_resource *rbuffer = scratch->buffer;
+			unsigned size_per_se = size / num_ses;
+
+			// Direct to particular SE
+			if (num_ses > 1) {
+				radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX,
+					S_0802C_INSTANCE_INDEX(0) |
+					S_0802C_SE_INDEX(se) |
+					S_0802C_INSTANCE_BROADCAST_WRITES(1) |
+					S_0802C_SE_BROADCAST_WRITES(0));
+			}
+
+			radeon_set_config_reg(cs, ring_base_reg, (rbuffer->gpu_address + size_per_se * se) >> 8);
+			radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+			radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
+				RADEON_USAGE_READWRITE,
+				RADEON_PRIO_SCRATCH_BUFFER));
+			radeon_set_context_reg(cs, item_size_reg, itemsize);
+			radeon_set_config_reg(cs, ring_size_reg, size_per_se >> 8);
+		}
+
+		// Restore broadcast mode
+		if (num_ses > 1) {
+			radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX,
+				S_0802C_INSTANCE_INDEX(0) |
+				S_0802C_SE_INDEX(0) |
+				S_0802C_INSTANCE_BROADCAST_WRITES(1) |
+				S_0802C_SE_BROADCAST_WRITES(1));
+		}
+
+		radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+	}
+}
+
+void r600_setup_scratch_buffers(struct r600_context *rctx) {
+	static const struct {
+		unsigned ring_base;
+		unsigned item_size;
+		unsigned ring_size;
+	} regs[R600_NUM_HW_STAGES] = {
+		[R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+		[R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+		[R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+		[R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }
+	};
+
+	for (unsigned i = 0; i < R600_NUM_HW_STAGES; i++) {
+		struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader;
+
+		if (stage && unlikely(stage->scratch_space_needed)) {
+			r600_setup_scratch_area_for_shader(rctx, stage,
+				&rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size);
+		}
+	}
+}
+
 #define SELECT_SHADER_OR_FAIL(x) do {					\
 		r600_shader_select(ctx, rctx->x##_shader, &x##_dirty);	\
 		if (unlikely(!rctx->x##_shader->current))		\
@@ -1785,6 +1883,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		r600_update_db_shader_control(rctx);
 	}
 
+	/* For each shader stage that needs to spill, set up buffer for MEM_SCRATCH */
+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_setup_scratch_buffers(rctx);
+	} else {
+		r600_setup_scratch_buffers(rctx);
+	}
+
 	/* on R600 we stuff masks + txq info into one constant buffer */
 	/* on evergreen we only need a txq info one */
 	if (rctx->ps_shader) {




More information about the mesa-commit mailing list