[Mesa-dev] [PATCH 1/3] radeonsi: clamp depth comparison value only for fixed point formats
Nicolai Hähnle
nhaehnle at gmail.com
Tue Sep 26 14:46:08 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The hardware usually does this automatically. However, we upgrade
depth to Z32_FLOAT to enable TC-compatible HTILE, which means the
hardware no longer clamps the comparison value for us.
The only way to tell in the shader whether a clamp is required
seems to be to communicate an additional bit in the descriptor
table. While VI has some unused bits in the resource descriptor,
those bits have unfortunately all been used in gfx9. So we use
an unused bit in the sampler state instead.
Fixes dEQP-GLES3.functional.texture.shadow.2d.linear.equal_depth_component32f
and many other tests in dEQP-GLES3.functional.texture.shadow.*
Fixes: d4d9ec55c589 ("radeonsi: implement TC-compatible HTILE")
---
src/amd/common/sid.h | 2 ++
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeon/r600_texture.c | 5 +++-
src/gallium/drivers/radeonsi/si_descriptors.c | 31 ++++++++++++++++++-----
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++++++++++++-----
src/gallium/drivers/radeonsi/si_state.c | 4 +++
7 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index a8c78c1b2bf..1016f674707 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -2446,20 +2446,22 @@
#define S_008F38_FILTER_PREC_FIX(x) (((unsigned)(x) & 0x1) << 30)
#define G_008F38_FILTER_PREC_FIX(x) (((x) >> 30) & 0x1)
#define C_008F38_FILTER_PREC_FIX 0xBFFFFFFF
#define S_008F38_ANISO_OVERRIDE(x) (((unsigned)(x) & 0x1) << 31)
#define G_008F38_ANISO_OVERRIDE(x) (((x) >> 31) & 0x1)
#define C_008F38_ANISO_OVERRIDE 0x7FFFFFFF
#define R_008F3C_SQ_IMG_SAMP_WORD3 0x008F3C
#define S_008F3C_BORDER_COLOR_PTR(x) (((unsigned)(x) & 0xFFF) << 0)
#define G_008F3C_BORDER_COLOR_PTR(x) (((x) >> 0) & 0xFFF)
#define C_008F3C_BORDER_COLOR_PTR 0xFFFFF000
+/* The UPGRADED_DEPTH field is driver-specific and does not exist in hardware. */
+#define S_008F3C_UPGRADED_DEPTH(x) (((unsigned)(x) & 0x1) << 29)
#define S_008F3C_BORDER_COLOR_TYPE(x) (((unsigned)(x) & 0x03) << 30)
#define G_008F3C_BORDER_COLOR_TYPE(x) (((x) >> 30) & 0x03)
#define C_008F3C_BORDER_COLOR_TYPE 0x3FFFFFFF
#define V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK 0x00
#define V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK 0x01
#define V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE 0x02
#define V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER 0x03
#define R_0090DC_SPI_DYN_GPR_LOCK_EN 0x0090DC /* not on CIK */
#define S_0090DC_VS_LOW_THRESHOLD(x) (((unsigned)(x) & 0x0F) << 0)
#define G_0090DC_VS_LOW_THRESHOLD(x) (((x) >> 0) & 0x0F)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index bd0dc76ec2b..41d8cc6f0a7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -243,20 +243,21 @@ struct r600_texture {
unsigned color_clear_value[2];
unsigned last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
uint64_t htile_offset;
bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
uint8_t stencil_clear_value;
+ bool upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
bool non_disp_tiling; /* R600-Cayman only */
/* Whether the texture is a displayable back buffer and needs DCC
* decompression, which is expensive. Therefore, it's enabled only
* if statistics suggest that it will pay off and it's allocated
* separately. It can't be bound as a sampler by apps. Limited to
* target == 2D and last_level == 0. If enabled, dcc_offset contains
* the absolute GPUVM address, not the relative one.
*/
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index e9507c3f541..7b177e056a3 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1225,22 +1225,25 @@ r600_texture_create_object(struct pipe_screen *screen,
(rtex->surface.flags &
RADEON_SURF_TC_COMPATIBLE_HTILE);
/* TC-compatible HTILE:
* - VI only supports Z32_FLOAT.
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
if (rtex->tc_compatible_htile) {
if (rscreen->chip_class >= GFX9 &&
base->format == PIPE_FORMAT_Z16_UNORM)
rtex->db_render_format = base->format;
- else
+ else {
rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+ rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
+ base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
+ }
} else {
rtex->db_render_format = base->format;
}
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b968a5057ac..27239c25389 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -415,22 +415,27 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
is_separate_stencil,
desc);
}
if (!is_buffer && rtex->fmask.size) {
memcpy(desc + 8, sview->fmask_state, 8*4);
} else {
/* Disable FMASK and bind sampler state in [12:15]. */
memcpy(desc + 8, null_texture_descriptor, 4*4);
- if (sstate)
- memcpy(desc + 12, sstate->val, 4*4);
+ if (sstate) {
+ if (!is_buffer && rtex->upgraded_depth &&
+ !sview->is_stencil_sampler)
+ memcpy(desc + 12, sstate->upgraded_depth_val, 4*4);
+ else
+ memcpy(desc + 12, sstate->val, 4*4);
+ }
}
}
static void si_set_sampler_view(struct si_context *sctx,
unsigned shader,
unsigned slot, struct pipe_sampler_view *view,
bool disallow_early_out)
{
struct si_sampler_views *views = &sctx->samplers[shader].views;
struct si_sampler_view *rview = (struct si_sampler_view*)view;
@@ -838,27 +843,39 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
continue;
#ifdef DEBUG
assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
#endif
samplers->views.sampler_states[slot] = sstates[i];
/* If FMASK is bound, don't overwrite it.
* The sampler state will be set after FMASK is unbound.
*/
- if (samplers->views.views[slot] &&
- samplers->views.views[slot]->texture &&
- samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
- ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
+ struct si_sampler_view *sview =
+ (struct si_sampler_view *)samplers->views.views[slot];
+
+ struct r600_texture *tex = NULL;
+
+ if (sview && sview->base.texture &&
+ sview->base.texture->target != PIPE_BUFFER)
+ tex = (struct r600_texture *)sview->base.texture;
+
+ if (tex && tex->fmask.size)
continue;
- memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
+ if (tex && tex->upgraded_depth && !sview->is_stencil_sampler)
+ memcpy(desc->list + desc_slot * 16 + 12,
+ sstates[i]->upgraded_depth_val, 4*4);
+ else
+ memcpy(desc->list + desc_slot * 16 + 12,
+ sstates[i]->val, 4*4);
+
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
}
/* BUFFER RESOURCES */
static void si_init_buffer_resources(struct si_buffer_resources *buffers,
struct si_descriptors *descs,
unsigned num_buffers,
unsigned shader_userdata_index,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ce6aa3be96b..80f38ea29bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -160,20 +160,21 @@ struct si_sampler_view {
bool dcc_incompatible;
};
#define SI_SAMPLER_STATE_MAGIC 0x34f1c35a
struct si_sampler_state {
#ifdef DEBUG
unsigned magic;
#endif
uint32_t val[4];
+ uint32_t upgraded_depth_val[4];
};
struct si_cs_shader_state {
struct si_compute *program;
struct si_compute *emitted_program;
unsigned offset;
bool initialized;
bool uses_scratch;
};
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 0f65984db07..be92044750c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1394,29 +1394,42 @@ static void tex_fetch_args(
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
LLVMValueRef z;
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
z = coords[ref_pos];
}
- /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ /* Section 8.23.1 (Depth Texture Comparison Mode) of the
+ * OpenGL 4.5 spec says:
+ *
+ * "If the texture’s internal format indicates a fixed-point
+ * depth texture, then D_t and D_ref are clamped to the
+ * range [0, 1]; otherwise no clamping is performed."
+ *
+ * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
* so the depth comparison value isn't clamped for Z16 and
* Z24 anymore. Do it manually here.
- *
- * It's unnecessary if the original texture format was
- * Z32_FLOAT, but we don't know that here.
*/
- if (ctx->screen->b.chip_class >= VI)
- z = ac_build_clamp(&ctx->ac, z);
+ if (ctx->screen->b.chip_class >= VI) {
+ LLVMValueRef upgraded;
+ LLVMValueRef clamped;
+ upgraded = LLVMBuildExtractElement(gallivm->builder, samp_ptr,
+ LLVMConstInt(ctx->i32, 3, false), "");
+ upgraded = LLVMBuildLShr(gallivm->builder, upgraded,
+ LLVMConstInt(ctx->i32, 29, false), "");
+ upgraded = LLVMBuildTrunc(gallivm->builder, upgraded, ctx->i1, "");
+ clamped = ac_build_clamp(&ctx->ac, z);
+ z = LLVMBuildSelect(gallivm->builder, upgraded, clamped, z, "");
+ }
address[count++] = z;
}
/* Pack user derivatives */
if (opcode == TGSI_OPCODE_TXD) {
int param, num_src_deriv_channels, num_dst_deriv_channels;
switch (target) {
case TGSI_TEXTURE_3D:
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3135566cd63..551bb17503c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3990,20 +3990,24 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
S_008F38_MIP_POINT_PRECLAMP(0) |
S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
S_008F3C_BORDER_COLOR_TYPE(border_color_type);
+
+ memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
+ rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
+
return rstate;
}
static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
{
struct si_context *sctx = (struct si_context *)ctx;
if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
return;
--
2.11.0
More information about the mesa-dev
mailing list