[Mesa-dev] [PATCH 36/42] radeonsi: rework uploading border colors

Marek Olšák maraeo at gmail.com
Sun Aug 30 12:12:06 PDT 2015


From: Marek Olšák <marek.olsak at amd.com>

The border colors are uploaded only once when the state is created.

This brings truly immutable sampler descriptors, because they don't have
to be updated every time a sampler state is re-bound.

It also moves the TA_BC_BASE_ADDR registers to init_config, removing one
more state. The catch is there is now a limit: only 4096 border colors can
be used by one context. I don't think that will be a problem.
---
 src/gallium/drivers/radeonsi/si_descriptors.c |   9 +-
 src/gallium/drivers/radeonsi/si_pipe.c        |  23 ++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |   8 +-
 src/gallium/drivers/radeonsi/si_state.c       | 124 +++++++++-----------------
 src/gallium/drivers/radeonsi/si_state.h       |   3 -
 5 files changed, 75 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 3041da6..92a7068 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
 }
 
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-				unsigned start, unsigned count, void **states)
+static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
+                                   unsigned start, unsigned count, void **states)
 {
+	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_sampler_states *samplers = &sctx->samplers[shader].states;
 	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
 	int i;
 
+	if (!count || shader >= SI_NUM_SHADERS)
+		return;
+
 	if (start == 0)
 		samplers->saved_states[0] = states[0];
 	if (start == 1)
@@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 			    4, SI_NUM_VERTEX_BUFFERS);
 
 	/* Set pipe_context functions. */
+	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_constant_buffer = si_set_constant_buffer;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index da77478..d68ea5f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context)
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
 	pipe_resource_reference(&sctx->tf_ring, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
-	r600_resource_reference(&sctx->border_color_table, NULL);
+	r600_resource_reference(&sctx->border_color_buffer, NULL);
+	free(sctx->border_color_table);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
 	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
@@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 						sscreen->b.trace_bo->cs_buf : NULL);
 	sctx->b.rings.gfx.flush = si_context_gfx_flush;
 
+	/* Border colors. */
+	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
+					  sizeof(*sctx->border_color_table));
+	if (!sctx->border_color_table)
+		goto fail;
+
+	sctx->border_color_buffer = (struct r600_resource*)
+		pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
+				   SI_MAX_BORDER_COLORS *
+				   sizeof(*sctx->border_color_table));
+	if (!sctx->border_color_buffer)
+		goto fail;
+
+	sctx->border_color_map =
+		ws->buffer_map(sctx->border_color_buffer->cs_buf,
+			       NULL, PIPE_TRANSFER_WRITE);
+	if (!sctx->border_color_map)
+		goto fail;
+
 	si_init_all_descriptors(sctx);
 	si_init_state_functions(sctx);
 	si_init_shader_functions(sctx);
@@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	return &sctx->b.b;
 fail:
+	fprintf(stderr, "radeonsi: Failed to create a context.\n");
 	si_destroy_context(&sctx->b.b);
 	return NULL;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 02d75f0..847853e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -79,6 +79,7 @@
 #define SI_GET_TRACE_POINT_ID(x)	((x) & 0xffff)
 
 #define SI_MAX_VIEWPORTS	16
+#define SI_MAX_BORDER_COLORS	4096
 
 struct si_compute;
 
@@ -103,7 +104,6 @@ struct si_sampler_view {
 
 struct si_sampler_state {
 	uint32_t			val[4];
-	uint32_t			border_color[4];
 };
 
 struct si_cs_shader_state {
@@ -219,8 +219,10 @@ struct si_context {
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tf_ring;
-	struct r600_resource		*border_color_table;
-	unsigned			border_color_offset;
+	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
+	struct r600_resource		*border_color_buffer;
+	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
+	unsigned			border_color_count;
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 52fa8fe..e31895d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st
 static void *si_create_sampler_state(struct pipe_context *ctx,
 				     const struct pipe_sampler_state *state)
 {
+	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
 	unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
-	unsigned border_color_type;
+	unsigned border_color_type, border_color_index = 0;
 
 	if (rstate == NULL) {
 		return NULL;
@@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 		 state->border_color.f[2] == 1 &&
 		 state->border_color.f[3] == 1)
 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
-	else
+	else {
+		int i;
+
 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
 
+		/* Check if the border has been uploaded already. */
+		for (i = 0; i < sctx->border_color_count; i++)
+			if (memcmp(&sctx->border_color_table[i], &state->border_color,
+				   sizeof(state->border_color)) == 0)
+				break;
+
+		if (i >= SI_MAX_BORDER_COLORS) {
+			/* Getting 4096 unique border colors is very unlikely. */
+			fprintf(stderr, "radeonsi: The border color table is full. "
+				"Any new border colors will be just black. "
+				"Please file a bug.\n");
+			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+		} else {
+			if (i == sctx->border_color_count) {
+				/* Upload a new border color. */
+				memcpy(&sctx->border_color_table[i], &state->border_color,
+				       sizeof(state->border_color));
+				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
+							&state->border_color,
+							sizeof(state->border_color));
+				sctx->border_color_count++;
+			}
+
+			border_color_index = i;
+		}
+	}
+
 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
@@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
 			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
-	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
-
-	if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-		memcpy(rstate->border_color, state->border_color.ui,
-		       sizeof(rstate->border_color));
-	}
-
+	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
+			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
 	return rstate;
 }
 
-/* Upload border colors and update the pointers in resource descriptors.
- * There can only be 4096 border colors per context.
- *
- * XXX: This is broken if the buffer gets reallocated.
- */
-static void si_set_border_colors(struct si_context *sctx, unsigned count,
-				 void **states)
-{
-	struct si_sampler_state **rstates = (struct si_sampler_state **)states;
-	uint32_t *border_color_table = NULL;
-	int i, j;
-
-	for (i = 0; i < count; i++) {
-		if (rstates[i] &&
-		    G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
-		    V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-			if (!sctx->border_color_table ||
-			    ((sctx->border_color_offset + count - i) &
-			     C_008F3C_BORDER_COLOR_PTR)) {
-				r600_resource_reference(&sctx->border_color_table, NULL);
-				sctx->border_color_offset = 0;
-
-				sctx->border_color_table =
-					si_resource_create_custom(&sctx->screen->b.b,
-								  PIPE_USAGE_DYNAMIC,
-								  4096 * 4 * 4);
-			}
-
-			if (!border_color_table) {
-			        border_color_table =
-					sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
-							     sctx->b.rings.gfx.cs,
-							     PIPE_TRANSFER_WRITE |
-							     PIPE_TRANSFER_UNSYNCHRONIZED);
-			}
-
-			for (j = 0; j < 4; j++) {
-				border_color_table[4 * sctx->border_color_offset + j] =
-					util_le32_to_cpu(rstates[i]->border_color[j]);
-			}
-
-			rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
-			rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
-		}
-	}
-
-	if (border_color_table) {
-		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
-		uint64_t va_offset = sctx->border_color_table->gpu_address;
-
-		si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
-		if (sctx->b.chip_class >= CIK)
-			si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
-		si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
-			      RADEON_PRIO_SHADER_DATA);
-		si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
-	}
-}
-
-static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
-                                   unsigned start, unsigned count,
-                                   void **states)
-{
-	struct si_context *sctx = (struct si_context *)ctx;
-
-	if (!count || shader >= SI_NUM_SHADERS)
-		return;
-
-	si_set_border_colors(sctx, count, states);
-	si_set_sampler_descriptors(sctx, shader, start, count, states);
-}
-
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx)
 	sctx->b.b.get_sample_position = cayman_get_sample_position;
 
 	sctx->b.b.create_sampler_state = si_create_sampler_state;
-	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
 
 	sctx->b.b.create_sampler_view = si_create_sampler_view;
@@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx)
 	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
+	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	int i;
 
@@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
 	}
 
+	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+	if (sctx->b.chip_class >= CIK)
+		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
+	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
+		      RADEON_PRIO_SHADER_DATA);
+
 	sctx->init_config = pm4;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 49f9f65..f5726f0 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -91,7 +91,6 @@ union si_state {
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*poly_offset;
-		struct si_pm4_state		*ta_bordercolor_base;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
@@ -246,8 +245,6 @@ struct si_buffer_resources {
 	} while(0)
 
 /* si_descriptors.c */
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-				unsigned start, unsigned count, void **states);
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
-- 
2.1.4



More information about the mesa-dev mailing list