[Mesa-dev] [PATCH 04/10] radeonsi: rework polygon stippling to use constant buffer instead of texture

Marek Olšák maraeo at gmail.com
Wed Apr 20 15:47:07 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

add it to the RW_BUFFERS descriptor array

now the slot masks don't have to have 64 bits
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 23 +++++++++
 src/gallium/drivers/radeonsi/si_pipe.c        |  2 -
 src/gallium/drivers/radeonsi/si_pipe.h        |  1 -
 src/gallium/drivers/radeonsi/si_shader.c      | 67 ++++++++++++---------------
 src/gallium/drivers/radeonsi/si_state.c       | 55 ----------------------
 src/gallium/drivers/radeonsi/si_state.h       |  8 ++--
 6 files changed, 55 insertions(+), 101 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b8f74f4..194b2eb 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1121,6 +1121,26 @@ static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
 		  S_008F04_BASE_ADDRESS_HI(va >> 32);
 }
 
+/* INTERNAL CONST BUFFERS */
+
+static void si_set_polygon_stipple(struct pipe_context *ctx,
+				   const struct pipe_poly_stipple *state)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct pipe_constant_buffer cb = {};
+	unsigned stipple[32];
+	int i;
+
+	for (i = 0; i < 32; i++)
+		stipple[i] = util_bitreverse(state->stipple[i]);
+
+	cb.user_buffer = stipple;
+	cb.buffer_size = sizeof(stipple);
+
+	si_set_constant_buffer(sctx, &sctx->rw_buffers,
+			       SI_PS_CONST_POLY_STIPPLE, &cb);
+}
+
 /* TEXTURE METADATA ENABLE/DISABLE */
 
 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
@@ -1401,6 +1421,8 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx,
 
 	if (sctx->rw_buffers.desc.pointer_dirty) {
 		si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc,
+				       R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc,
 				       R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
 		si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc,
 				       R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
@@ -1478,6 +1500,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_shader_images = si_set_shader_images;
 	sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
+	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 17d59b6..2a5cf0a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -61,8 +61,6 @@ static void si_destroy_context(struct pipe_context *context)
 	for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
-	if (sctx->pstipple_sampler_state)
-		sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
 	if (sctx->fixed_func_tcs_shader.cso)
 		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
 	if (sctx->custom_dsa_flush)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 48095b0..85bf10f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -190,7 +190,6 @@ struct si_context {
 	void				*custom_blend_decompress;
 	void				*custom_blend_fastclear;
 	void				*custom_blend_dcc_decompress;
-	void				*pstipple_sampler_state;
 	struct si_screen		*screen;
 
 	struct radeon_winsys_cs		*ce_ib;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 231b2c3..cfea2db 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5291,15 +5291,14 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
 }
 
 static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
-					 LLVMValueRef param_sampler_views,
+					 LLVMValueRef param_rw_buffers,
 					 unsigned param_pos_fixed_pt)
 {
 	struct lp_build_tgsi_context *bld_base =
 		&ctx->radeon_bld.soa.bld_base;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	struct lp_build_emit_data result = {};
-	struct tgsi_full_instruction inst = {};
-	LLVMValueRef desc, sampler_index, address[2], pix;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef slot, desc, offset, row, bit, address[2];
 
 	/* Use the fixed-point gl_FragCoord input.
 	 * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
@@ -5308,29 +5307,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 	address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
 	address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
 
-	/* Load the sampler view descriptor. */
-	sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER);
-	desc = get_sampler_desc_custom(ctx, param_sampler_views,
-				       sampler_index, DESC_IMAGE);
-
-	/* Load the texel. */
-	inst.Instruction.Opcode = TGSI_OPCODE_TXF;
-	inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */
-	result.inst = &inst;
-	set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF,
-			   inst.Texture.Texture,
-			   desc, NULL, address, ARRAY_SIZE(address), 0xf);
-	build_tex_intrinsic(&tex_action, bld_base, &result);
-
-	/* Kill the thread accordingly. */
-	pix = LLVMBuildExtractElement(gallivm->builder, result.output[0],
-				      lp_build_const_int32(gallivm, 3), "");
-	pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix);
-	pix = LLVMBuildFNeg(gallivm->builder, pix, "");
+	/* Load the buffer descriptor. */
+	slot = lp_build_const_int32(gallivm, SI_PS_CONST_POLY_STIPPLE);
+	desc = build_indexed_load_const(ctx, param_rw_buffers, slot);
 
-	lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-			   LLVMVoidTypeInContext(gallivm->context),
-			   &pix, 1, 0);
+	/* The stipple pattern is 32x32, each row has 32 bits. */
+	offset = LLVMBuildMul(builder, address[1],
+			      LLVMConstInt(ctx->i32, 4, 0), "");
+	row = buffer_load_const(builder, desc, offset, ctx->i32);
+	bit = LLVMBuildLShr(builder, row, address[0], "");
+	bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
+
+	/* The intrinsic kills the thread if arg < 0. */
+	bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
+			      LLVMConstReal(ctx->f32, -1), "");
+	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
 }
 
 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
@@ -6038,9 +6029,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
 	if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
 	    shader->key.ps.prolog.poly_stipple) {
-		LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn,
-						  SI_PARAM_SAMPLERS);
-		si_llvm_emit_polygon_stipple(&ctx, views,
+		LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
+						 SI_PARAM_RW_BUFFERS);
+		si_llvm_emit_polygon_stipple(&ctx, list,
 					     SI_PARAM_POS_FIXED_PT);
 	}
 
@@ -6618,17 +6609,17 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
 		/* POS_FIXED_PT is always last. */
 		unsigned pos = key->ps_prolog.num_input_sgprs +
 			       key->ps_prolog.num_input_vgprs - 1;
-		LLVMValueRef ptr[2], views;
+		LLVMValueRef ptr[2], list;
 
-		/* Get the pointer to sampler views. */
-		ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS);
-		ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1);
-		views = lp_build_gather_values(gallivm, ptr, 2);
-		views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, "");
-		views = LLVMBuildIntToPtr(gallivm->builder, views,
-					  const_array(ctx.v8i32, SI_NUM_SAMPLERS), "");
+		/* Get the pointer to rw buffers. */
+		ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
+		ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
+		list = lp_build_gather_values(gallivm, ptr, 2);
+		list = LLVMBuildBitCast(gallivm->builder, list, ctx.i64, "");
+		list = LLVMBuildIntToPtr(gallivm->builder, list,
+					  const_array(ctx.v16i8, SI_NUM_RW_BUFFERS), "");
 
-		si_llvm_emit_polygon_stipple(&ctx, views, pos);
+		si_llvm_emit_polygon_stipple(&ctx, list, pos);
 	}
 
 	/* Interpolate colors. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 305a70b..1f3a5fa 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3374,60 +3374,6 @@ static void si_set_index_buffer(struct pipe_context *ctx,
 /*
  * Misc
  */
-static void si_set_polygon_stipple(struct pipe_context *ctx,
-				   const struct pipe_poly_stipple *state)
-{
-	struct si_context *sctx = (struct si_context *)ctx;
-	struct pipe_resource *tex;
-	struct pipe_sampler_view *view;
-	bool is_zero = true;
-	bool is_one = true;
-	int i;
-
-	/* The hardware obeys 0 and 1 swizzles in the descriptor even if
-	 * the resource is NULL/invalid. Take advantage of this fact and skip
-	 * texture allocation if the stipple pattern is constant.
-	 *
-	 * This is an optimization for the common case when stippling isn't
-	 * used but set_polygon_stipple is still called by st/mesa.
-	 */
-	for (i = 0; i < Elements(state->stipple); i++) {
-		is_zero = is_zero && state->stipple[i] == 0;
-		is_one = is_one && state->stipple[i] == 0xffffffff;
-	}
-
-	if (is_zero || is_one) {
-		struct pipe_sampler_view templ = {{0}};
-
-		templ.swizzle_r = PIPE_SWIZZLE_ZERO;
-		templ.swizzle_g = PIPE_SWIZZLE_ZERO;
-		templ.swizzle_b = PIPE_SWIZZLE_ZERO;
-		/* The pattern should be inverted in the texture. */
-		templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO;
-
-		view = ctx->create_sampler_view(ctx, NULL, &templ);
-	} else {
-		/* Create a new texture. */
-		tex = util_pstipple_create_stipple_texture(ctx, state->stipple);
-		if (!tex)
-			return;
-
-		view = util_pstipple_create_sampler_view(ctx, tex);
-		pipe_resource_reference(&tex, NULL);
-	}
-
-	ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT,
-			       SI_POLY_STIPPLE_SAMPLER, 1, &view);
-	pipe_sampler_view_reference(&view, NULL);
-
-	/* Bind the sampler state if needed. */
-	if (!sctx->pstipple_sampler_state) {
-		sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx);
-		ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT,
-					 SI_POLY_STIPPLE_SAMPLER, 1,
-					 &sctx->pstipple_sampler_state);
-	}
-}
 
 static void si_set_tess_state(struct pipe_context *ctx,
 			      const float default_outer_level[4],
@@ -3590,7 +3536,6 @@ void si_init_state_functions(struct si_context *sctx)
 
 	sctx->b.b.texture_barrier = si_texture_barrier;
 	sctx->b.b.memory_barrier = si_memory_barrier;
-	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.b.set_min_samples = si_set_min_samples;
 	sctx->b.b.set_tess_state = si_set_tess_state;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index e1e7ae0..9875606 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -146,12 +146,8 @@ struct si_shader_data {
 	uint32_t		sh_base[SI_NUM_SHADERS];
 };
 
-/* User sampler views:   0..31
- * Polygon stipple tex:  32
- */
 #define SI_NUM_USER_SAMPLERS            32 /* AKA OpenGL textures units per shader */
-#define SI_POLY_STIPPLE_SAMPLER         SI_NUM_USER_SAMPLERS
-#define SI_NUM_SAMPLERS                 (SI_POLY_STIPPLE_SAMPLER + 1)
+#define SI_NUM_SAMPLERS                 SI_NUM_USER_SAMPLERS
 
 /* User constant buffers:   0..15
  * Driver state constants:  16
@@ -182,6 +178,8 @@ enum {
 	SI_VS_STREAMOUT_BUF2,
 	SI_VS_STREAMOUT_BUF3,
 
+	SI_PS_CONST_POLY_STIPPLE,
+
 	SI_NUM_RW_BUFFERS,
 };
 
-- 
2.5.0



More information about the mesa-dev mailing list