[Mesa-dev] [PATCH 1/3] radeonsi: clamp depth comparison value only for fixed point formats

Nicolai Hähnle nhaehnle at gmail.com
Tue Sep 26 14:46:08 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

The hardware usually does this automatically. However, we upgrade
depth to Z32_FLOAT to enable TC-compatible HTILE, which means the
hardware no longer clamps the comparison value for us.

The only way to tell in the shader whether a clamp is required
seems to be to communicate an additional bit in the descriptor
table. While VI has some unused bits in the resource descriptor,
those bits have unfortunately all been used in gfx9. So we use
an unused bit in the sampler state instead.

Fixes dEQP-GLES3.functional.texture.shadow.2d.linear.equal_depth_component32f
and many other tests in dEQP-GLES3.functional.texture.shadow.*

Fixes: d4d9ec55c589 ("radeonsi: implement TC-compatible HTILE")
---
 src/amd/common/sid.h                              |  2 ++
 src/gallium/drivers/radeon/r600_pipe_common.h     |  1 +
 src/gallium/drivers/radeon/r600_texture.c         |  5 +++-
 src/gallium/drivers/radeonsi/si_descriptors.c     | 31 ++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_pipe.h            |  1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++++++++++++-----
 src/gallium/drivers/radeonsi/si_state.c           |  4 +++
 7 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index a8c78c1b2bf..1016f674707 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -2446,20 +2446,22 @@
 #define   S_008F38_FILTER_PREC_FIX(x)                                 (((unsigned)(x) & 0x1) << 30)
 #define   G_008F38_FILTER_PREC_FIX(x)                                 (((x) >> 30) & 0x1)
 #define   C_008F38_FILTER_PREC_FIX                                    0xBFFFFFFF
 #define   S_008F38_ANISO_OVERRIDE(x)                                  (((unsigned)(x) & 0x1) << 31)
 #define   G_008F38_ANISO_OVERRIDE(x)                                  (((x) >> 31) & 0x1)
 #define   C_008F38_ANISO_OVERRIDE                                     0x7FFFFFFF
 #define R_008F3C_SQ_IMG_SAMP_WORD3                                      0x008F3C
 #define   S_008F3C_BORDER_COLOR_PTR(x)                                (((unsigned)(x) & 0xFFF) << 0)
 #define   G_008F3C_BORDER_COLOR_PTR(x)                                (((x) >> 0) & 0xFFF)
 #define   C_008F3C_BORDER_COLOR_PTR                                   0xFFFFF000
+/* The UPGRADED_DEPTH field is driver-specific and does not exist in hardware. */
+#define   S_008F3C_UPGRADED_DEPTH(x)                                  (((unsigned)(x) & 0x1) << 29)
 #define   S_008F3C_BORDER_COLOR_TYPE(x)                               (((unsigned)(x) & 0x03) << 30)
 #define   G_008F3C_BORDER_COLOR_TYPE(x)                               (((x) >> 30) & 0x03)
 #define   C_008F3C_BORDER_COLOR_TYPE                                  0x3FFFFFFF
 #define     V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK                0x00
 #define     V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK               0x01
 #define     V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE               0x02
 #define     V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER                   0x03
 #define R_0090DC_SPI_DYN_GPR_LOCK_EN                                    0x0090DC /* not on CIK */
 #define   S_0090DC_VS_LOW_THRESHOLD(x)                                (((unsigned)(x) & 0x0F) << 0)
 #define   G_0090DC_VS_LOW_THRESHOLD(x)                                (((x) >> 0) & 0x0F)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index bd0dc76ec2b..41d8cc6f0a7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -243,20 +243,21 @@ struct r600_texture {
 	unsigned			color_clear_value[2];
 	unsigned			last_msaa_resolve_target_micro_mode;
 
 	/* Depth buffer compression and fast clear. */
 	uint64_t			htile_offset;
 	bool				tc_compatible_htile;
 	bool				depth_cleared; /* if it was cleared at least once */
 	float				depth_clear_value;
 	bool				stencil_cleared; /* if it was cleared at least once */
 	uint8_t				stencil_clear_value;
+	bool				upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
 
 	bool				non_disp_tiling; /* R600-Cayman only */
 
 	/* Whether the texture is a displayable back buffer and needs DCC
 	 * decompression, which is expensive. Therefore, it's enabled only
 	 * if statistics suggest that it will pay off and it's allocated
 	 * separately. It can't be bound as a sampler by apps. Limited to
 	 * target == 2D and last_level == 0. If enabled, dcc_offset contains
 	 * the absolute GPUVM address, not the relative one.
 	 */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index e9507c3f541..7b177e056a3 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1225,22 +1225,25 @@ r600_texture_create_object(struct pipe_screen *screen,
 				    (rtex->surface.flags &
 				     RADEON_SURF_TC_COMPATIBLE_HTILE);
 
 	/* TC-compatible HTILE:
 	 * - VI only supports Z32_FLOAT.
 	 * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
 	if (rtex->tc_compatible_htile) {
 		if (rscreen->chip_class >= GFX9 &&
 		    base->format == PIPE_FORMAT_Z16_UNORM)
 			rtex->db_render_format = base->format;
-		else
+		else {
 			rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+			rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
+					       base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
+		}
 	} else {
 		rtex->db_render_format = base->format;
 	}
 
 	/* Tiled depth textures utilize the non-displayable tile order.
 	 * This must be done after r600_setup_surface.
 	 * Applies to R600-Cayman. */
 	rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
 	/* Applies to GCN. */
 	rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b968a5057ac..27239c25389 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -415,22 +415,27 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
 					       is_separate_stencil,
 					       desc);
 	}
 
 	if (!is_buffer && rtex->fmask.size) {
 		memcpy(desc + 8, sview->fmask_state, 8*4);
 	} else {
 		/* Disable FMASK and bind sampler state in [12:15]. */
 		memcpy(desc + 8, null_texture_descriptor, 4*4);
 
-		if (sstate)
-			memcpy(desc + 12, sstate->val, 4*4);
+		if (sstate) {
+			if (!is_buffer && rtex->upgraded_depth &&
+			    !sview->is_stencil_sampler)
+				memcpy(desc + 12, sstate->upgraded_depth_val, 4*4);
+			else
+				memcpy(desc + 12, sstate->val, 4*4);
+		}
 	}
 }
 
 static void si_set_sampler_view(struct si_context *sctx,
 				unsigned shader,
 				unsigned slot, struct pipe_sampler_view *view,
 				bool disallow_early_out)
 {
 	struct si_sampler_views *views = &sctx->samplers[shader].views;
 	struct si_sampler_view *rview = (struct si_sampler_view*)view;
@@ -838,27 +843,39 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
 			continue;
 
 #ifdef DEBUG
 		assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
 #endif
 		samplers->views.sampler_states[slot] = sstates[i];
 
 		/* If FMASK is bound, don't overwrite it.
 		 * The sampler state will be set after FMASK is unbound.
 		 */
-		if (samplers->views.views[slot] &&
-		    samplers->views.views[slot]->texture &&
-		    samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
-		    ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
+		struct si_sampler_view *sview =
+			(struct si_sampler_view *)samplers->views.views[slot];
+
+		struct r600_texture *tex = NULL;
+
+		if (sview && sview->base.texture &&
+		    sview->base.texture->target != PIPE_BUFFER)
+			tex = (struct r600_texture *)sview->base.texture;
+
+		if (tex && tex->fmask.size)
 			continue;
 
-		memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
+		if (tex && tex->upgraded_depth && !sview->is_stencil_sampler)
+			memcpy(desc->list + desc_slot * 16 + 12,
+			       sstates[i]->upgraded_depth_val, 4*4);
+		else
+			memcpy(desc->list + desc_slot * 16 + 12,
+			       sstates[i]->val, 4*4);
+
 		sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
 	}
 }
 
 /* BUFFER RESOURCES */
 
 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 				     struct si_descriptors *descs,
 				     unsigned num_buffers,
 				     unsigned shader_userdata_index,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ce6aa3be96b..80f38ea29bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -160,20 +160,21 @@ struct si_sampler_view {
 	bool dcc_incompatible;
 };
 
 #define SI_SAMPLER_STATE_MAGIC 0x34f1c35a
 
 struct si_sampler_state {
 #ifdef DEBUG
 	unsigned			magic;
 #endif
 	uint32_t			val[4];
+	uint32_t			upgraded_depth_val[4];
 };
 
 struct si_cs_shader_state {
 	struct si_compute		*program;
 	struct si_compute		*emitted_program;
 	unsigned			offset;
 	bool				initialized;
 	bool				uses_scratch;
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 0f65984db07..be92044750c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1394,29 +1394,42 @@ static void tex_fetch_args(
 	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
 		LLVMValueRef z;
 
 		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
 		} else {
 			assert(ref_pos >= 0);
 			z = coords[ref_pos];
 		}
 
-		/* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+		/* Section 8.23.1 (Depth Texture Comparison Mode) of the
+		 * OpenGL 4.5 spec says:
+		 *
+		 *    "If the texture’s internal format indicates a fixed-point
+		 *     depth texture, then D_t and D_ref are clamped to the
+		 *     range [0, 1]; otherwise no clamping is performed."
+		 *
+		 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
 		 * so the depth comparison value isn't clamped for Z16 and
 		 * Z24 anymore. Do it manually here.
-		 *
-		 * It's unnecessary if the original texture format was
-		 * Z32_FLOAT, but we don't know that here.
 		 */
-		if (ctx->screen->b.chip_class >= VI)
-			z = ac_build_clamp(&ctx->ac, z);
+		if (ctx->screen->b.chip_class >= VI) {
+			LLVMValueRef upgraded;
+			LLVMValueRef clamped;
+			upgraded = LLVMBuildExtractElement(gallivm->builder, samp_ptr,
+							   LLVMConstInt(ctx->i32, 3, false), "");
+			upgraded = LLVMBuildLShr(gallivm->builder, upgraded,
+						 LLVMConstInt(ctx->i32, 29, false), "");
+			upgraded = LLVMBuildTrunc(gallivm->builder, upgraded, ctx->i1, "");
+			clamped = ac_build_clamp(&ctx->ac, z);
+			z = LLVMBuildSelect(gallivm->builder, upgraded, clamped, z, "");
+		}
 
 		address[count++] = z;
 	}
 
 	/* Pack user derivatives */
 	if (opcode == TGSI_OPCODE_TXD) {
 		int param, num_src_deriv_channels, num_dst_deriv_channels;
 
 		switch (target) {
 		case TGSI_TEXTURE_3D:
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3135566cd63..551bb17503c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3990,20 +3990,24 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
 			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
 			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
 			  S_008F38_MIP_POINT_PRECLAMP(0) |
 			  S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
 			  S_008F38_FILTER_PREC_FIX(1) |
 			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
 	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
 			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
+
+	memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
+	rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
+
 	return rstate;
 }
 
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
 		return;
 
-- 
2.11.0



More information about the mesa-dev mailing list