[Mesa-dev] [PATCH 13/15] radeonsi: don't re-upload the sample position constant buffer repeatedly

Marek Olšák maraeo at gmail.com
Tue Oct 2 22:35:45 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.c       |  7 +++++++
 src/gallium/drivers/radeonsi/si_pipe.h       | 13 ++++++++-----
 src/gallium/drivers/radeonsi/si_state.c      | 19 +++++++++++++------
 src/gallium/drivers/radeonsi/si_state_msaa.c | 10 +++++-----
 4 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4da361c42ee..5ae9c298e77 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -153,20 +153,21 @@ static void si_destroy_context(struct pipe_context *context)
 	struct pipe_framebuffer_state fb = {};
 	if (context->set_framebuffer_state)
 		context->set_framebuffer_state(context, &fb);
 
 	si_release_all_descriptors(sctx);
 
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
 	pipe_resource_reference(&sctx->tess_rings, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
+	pipe_resource_reference(&sctx->sample_pos_buffer, NULL);
 	r600_resource_reference(&sctx->border_color_buffer, NULL);
 	free(sctx->border_color_table);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
 	r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
 	r600_resource_reference(&sctx->wait_mem_scratch, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
 	if (sctx->init_config_gs_rings)
 		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
 	for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
@@ -592,20 +593,26 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 						    _mesa_key_pointer_equal);
 	sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
 						    _mesa_key_pointer_equal);
 
 	util_dynarray_init(&sctx->resident_tex_handles, NULL);
 	util_dynarray_init(&sctx->resident_img_handles, NULL);
 	util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL);
 	util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
 	util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
 
+	sctx->sample_pos_buffer =
+		pipe_buffer_create(sctx->b.screen, 0, PIPE_USAGE_DEFAULT,
+				   sizeof(sctx->sample_positions));
+	pipe_buffer_write(&sctx->b, sctx->sample_pos_buffer, 0,
+			  sizeof(sctx->sample_positions), &sctx->sample_positions);
+
 	/* this must be last */
 	si_begin_new_gfx_cs(sctx);
 	return &sctx->b;
 fail:
 	fprintf(stderr, "radeonsi: Failed to create a context.\n");
 	si_destroy_context(&sctx->b);
 	return NULL;
 }
 
 static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ff11eab0224..93082e262d6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -958,25 +958,28 @@ struct si_context {
 	struct util_dynarray	resident_img_needs_color_decompress;
 	struct util_dynarray	resident_tex_needs_depth_decompress;
 
 	/* Bindless state */
 	bool			uses_bindless_samplers;
 	bool			uses_bindless_images;
 
 	/* MSAA sample locations.
 	 * The first index is the sample index.
 	 * The second index is the coordinate: X, Y. */
-	float			sample_locations_1x[1][2];
-	float			sample_locations_2x[2][2];
-	float			sample_locations_4x[4][2];
-	float			sample_locations_8x[8][2];
-	float			sample_locations_16x[16][2];
+	struct {
+		float			x1[1][2];
+		float			x2[2][2];
+		float			x4[4][2];
+		float			x8[8][2];
+		float			x16[16][2];
+	} sample_positions;
+	struct pipe_resource *sample_pos_buffer;
 
 	/* Misc stats. */
 	unsigned			num_draw_calls;
 	unsigned			num_decompress_calls;
 	unsigned			num_mrt_draw_calls;
 	unsigned			num_prim_restart_calls;
 	unsigned			num_spill_draw_calls;
 	unsigned			num_compute_calls;
 	unsigned			num_spill_compute_calls;
 	unsigned			num_dma_calls;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 0cebd974d80..f4fc4fd69da 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2711,21 +2711,20 @@ static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *sta
 		tex = (struct si_texture*)surf->base.texture;
 
 		p_atomic_dec(&tex->framebuffers_bound);
 	}
 }
 
 static void si_set_framebuffer_state(struct pipe_context *ctx,
 				     const struct pipe_framebuffer_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct pipe_constant_buffer constbuf = {0};
 	struct si_surface *surf = NULL;
 	struct si_texture *tex;
 	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
 	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
 	unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
 	bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
 	bool old_has_stencil =
 		old_has_zsbuf &&
 		((struct si_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
 	bool unbound = false;
@@ -2935,39 +2934,47 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
 	if (sctx->screen->has_out_of_order_rast &&
 	    (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
 	     !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
 	     (zstex && zstex->surface.has_stencil != old_has_stencil)))
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
+		struct pipe_constant_buffer constbuf = {0};
+
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
+		constbuf.buffer = sctx->sample_pos_buffer;
+
 		/* Set sample locations as fragment shader constants. */
 		switch (sctx->framebuffer.nr_samples) {
 		case 1:
-			constbuf.user_buffer = sctx->sample_locations_1x;
+			constbuf.buffer_offset = 0;
 			break;
 		case 2:
-			constbuf.user_buffer = sctx->sample_locations_2x;
+			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x2 -
+						 (ubyte*)sctx->sample_positions.x1;
 			break;
 		case 4:
-			constbuf.user_buffer = sctx->sample_locations_4x;
+			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x4 -
+						 (ubyte*)sctx->sample_positions.x1;
 			break;
 		case 8:
-			constbuf.user_buffer = sctx->sample_locations_8x;
+			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x8 -
+						 (ubyte*)sctx->sample_positions.x1;
 			break;
 		case 16:
-			constbuf.user_buffer = sctx->sample_locations_16x;
+			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x16 -
+						 (ubyte*)sctx->sample_positions.x1;
 			break;
 		default:
 			PRINT_ERR("Requested an invalid number of samples %i.\n",
 				 sctx->framebuffer.nr_samples);
 			assert(0);
 		}
 		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
 
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c
index f9387e75ed1..b741bcadec8 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -191,21 +191,21 @@ void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
 		break;
 	}
 }
 
 void si_init_msaa_functions(struct si_context *sctx)
 {
 	int i;
 
 	sctx->b.get_sample_position = si_get_sample_position;
 
-	si_get_sample_position(&sctx->b, 1, 0, sctx->sample_locations_1x[0]);
+	si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]);
 
 	for (i = 0; i < 2; i++)
-		si_get_sample_position(&sctx->b, 2, i, sctx->sample_locations_2x[i]);
+		si_get_sample_position(&sctx->b, 2, i, sctx->sample_positions.x2[i]);
 	for (i = 0; i < 4; i++)
-		si_get_sample_position(&sctx->b, 4, i, sctx->sample_locations_4x[i]);
+		si_get_sample_position(&sctx->b, 4, i, sctx->sample_positions.x4[i]);
 	for (i = 0; i < 8; i++)
-		si_get_sample_position(&sctx->b, 8, i, sctx->sample_locations_8x[i]);
+		si_get_sample_position(&sctx->b, 8, i, sctx->sample_positions.x8[i]);
 	for (i = 0; i < 16; i++)
-		si_get_sample_position(&sctx->b, 16, i, sctx->sample_locations_16x[i]);
+		si_get_sample_position(&sctx->b, 16, i, sctx->sample_positions.x16[i]);
 }
-- 
2.17.1



More information about the mesa-dev mailing list