[Mesa-dev] [PATCH] radv: rework the TC-compat HTILE hardware bug with COND_EXEC
Samuel Pitoiset
samuel.pitoiset at gmail.com
Tue Dec 4 15:56:02 UTC 2018
On 12/3/18 11:21 PM, Bas Nieuwenhuizen wrote:
> On Mon, Dec 3, 2018 at 11:13 PM Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>>
>> After investigating on this, it appears that COND_WRITE doesn't
>> work correctly in some situations. I don't know exactly why does
>> it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
>> also uses COND_EXEC I think there is a reason.
>>
>> Now the driver stores a new metadata value in order to reflect
>> the last fast depth clear state. If a TC-compat HTILE is fast cleared
>> with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
>> work around that hardware bug.
>>
>> This fixes rendering issues with The Forest and DXVK and doesn't
>> seem to introduce any regressions.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
>> Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug")
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>> src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++----------
>> src/amd/vulkan/radv_image.c | 10 +++-
>> src/amd/vulkan/radv_private.h | 2 +
>> 3 files changed, 75 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index ccf762ead46..23909a0f7dd 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -1067,7 +1067,7 @@ static void
>> radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>> struct radv_ds_buffer_info *ds,
>> struct radv_image *image, VkImageLayout layout,
>> - bool requires_cond_write)
>> + bool requires_cond_exec)
>> {
>> uint32_t db_z_info = ds->db_z_info;
>> uint32_t db_z_info_reg;
>> @@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>> }
>>
>> /* When we don't know the last fast clear value we need to emit a
>> - * conditional packet, otherwise we can update DB_Z_INFO directly.
>> + * conditional packet that will eventually skip the following
>> + * SET_CONTEXT_REG packet.
>> */
>> - if (requires_cond_write) {
>> - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
>> -
>> - const uint32_t write_space = 0 << 8; /* register */
>> - const uint32_t poll_space = 1 << 4; /* memory */
>> - const uint32_t function = 3 << 0; /* equal to the reference */
>> - const uint32_t options = write_space | poll_space | function;
>> - radeon_emit(cmd_buffer->cs, options);
>> -
>> - /* poll address - location of the depth clear value */
>> + if (requires_cond_exec) {
>> uint64_t va = radv_buffer_get_va(image->bo);
>> - va += image->offset + image->clear_value_offset;
>> -
>> - /* In presence of stencil format, we have to adjust the base
>> - * address because the first value is the stencil clear value.
>> - */
>> - if (vk_format_is_stencil(image->vk_format))
>> - va += 4;
>> + va += image->offset + image->tc_compat_zrange_offset;
>>
>> + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
>> radeon_emit(cmd_buffer->cs, va);
>> radeon_emit(cmd_buffer->cs, va >> 32);
>> -
>> - radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
>> - radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
>> - radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
>> - radeon_emit(cmd_buffer->cs, 0u); /* write address high */
>> - radeon_emit(cmd_buffer->cs, db_z_info);
>> - } else {
>> - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
>> + radeon_emit(cmd_buffer->cs, 0);
>> + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
>> }
>> +
>> + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
>> }
>>
>> static void
>> @@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>> radeon_emit(cs, fui(ds_clear_value.depth));
>> }
>>
>> +/**
>> + * Update the TC-compat metadata value for this image.
>> + */
>> +static void
>> +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
>> + struct radv_image *image,
>> + uint32_t value)
>> +{
>> + struct radeon_cmdbuf *cs = cmd_buffer->cs;
>> + uint64_t va = radv_buffer_get_va(image->bo);
>> + va += image->offset + image->tc_compat_zrange_offset;
>> +
>> + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
>> + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
>> + S_370_WR_CONFIRM(1) |
>> + S_370_ENGINE_SEL(V_370_PFP));
>> + radeon_emit(cs, va);
>> + radeon_emit(cs, va >> 32);
>> + radeon_emit(cs, value);
>> +}
>> +
>> +static void
>> +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
>> + struct radv_image *image,
>> + VkClearDepthStencilValue ds_clear_value)
>> +{
>> + struct radeon_cmdbuf *cs = cmd_buffer->cs;
>> + uint64_t va = radv_buffer_get_va(image->bo);
>> + va += image->offset + image->tc_compat_zrange_offset;
>> + uint32_t cond_val;
>> +
>> + /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
>> + * depth clear value is 0.0f.
>> + */
>> + cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
>> +
>> + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
>> +}
>> +
>> /**
>> * Update the clear depth/stencil values for this image.
>> */
>> @@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>>
>> radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
>>
>> + if (radv_image_is_tc_compat_htile(image) &&
>> + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
>> + radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
>> + ds_clear_value);
>> + }
>> +
>> radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
>> aspects);
>> }
>> @@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
>> aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
>>
>> radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
>> +
>> + if (radv_image_is_tc_compat_htile(image)) {
>> + /* Initialize the TC-compat metada value to 0 because by
>> + * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
>> + * need have to conditionally update its value when performing
>> + * a fast depth clear.
>> + */
>> + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
>> + }
>> }
>>
>> static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>> index f447166d80c..9fcbaa3184c 100644
>> --- a/src/amd/vulkan/radv_image.c
>> +++ b/src/amd/vulkan/radv_image.c
>> @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
>> /* + 8 for storing the clear values */
>> image->clear_value_offset = image->htile_offset + image->surface.htile_size;
>> image->size = image->clear_value_offset + 8;
>> + if (radv_image_is_tc_compat_htile(image)) {
>> + /* Metadata for the TC-compatible HTILE hardware bug which
>> + * have to be fixed by updating ZRANGE_PRECISION when doing
>> + * fast depth clears to 0.0f.
>> + */
>> + image->tc_compat_zrange_offset = image->clear_value_offset + 8;
>> + image->size = image->clear_value_offset + 16;
>> + }
>> image->alignment = align64(image->alignment, image->surface.htile_alignment);
>> }
>>
>> @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
>> /* Otherwise, try to enable HTILE for depth surfaces. */
>> if (radv_image_can_enable_htile(image) &&
>> !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
>> - radv_image_alloc_htile(image);
>> image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
>> + radv_image_alloc_htile(image);
>> } else {
>> image->surface.htile_size = 0;
>> }
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>> index ac756f2c247..1ef5b215db7 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -1503,6 +1503,8 @@ struct radv_image {
>> uint64_t clear_value_offset;
>> uint64_t fce_pred_offset;
>>
>> + uint64_t tc_compat_zrange_offset;
>
> Can we document what the values put into this memory location mean?
>
Sure, will do before pushing.
> Otherwise
>
> Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>
>> +
>> /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
>> VkDeviceMemory owned_memory;
>> };
>> --
>> 2.19.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list