[PATCH 1/2] drm/amdgpu: Add HDP flush to VCN v4.0.3

Lazar, Lijo lijo.lazar at amd.com
Tue Mar 5 09:40:19 UTC 2024



On 3/5/2024 2:44 PM, Christian König wrote:
> Am 05.03.24 um 10:01 schrieb Lazar, Lijo:
>> On 3/5/2024 2:22 PM, Christian König wrote:
>>> Am 05.03.24 um 07:40 schrieb Lijo Lazar:
>>>> VCN 4.0.3 cannot trigger HDP flush with RRMT enabled. Instead, trigger
>>>> HDP flush from host side before ringing doorbell.
>>> Well that won't work like that.
>>>
>>> The HDP flush is supposed to be emitted into the ring buffer of the
>>> engine. If you just "emulate" it by a register write than that write
>>> comes at the wrong time.
>>>
>>> This code here is seriously broken.
>>>
>> This approach was in fact suggested by VCN IP and FW team since RRMT is
>> not working from VCN side to do a remote flush of HDP (HDP is active
>> only in one of the AIDs). They mentioned that VCN doesn't need to flush
>> HDP at the time of processing.
> 
> In this case you don't need to implement anything here.
> 
> See amdgpu_device_flush_hdp(), we already have the CPU fallback for the
> engines which don't have access to the HDP:
> 
> #ifdef CONFIG_X86_64
>         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
>                 return;
> #endif
>         if (adev->gmc.xgmi.connected_to_cpu)
>                 return;
> 
>         if (ring && ring->funcs->emit_hdp_flush)
>                 amdgpu_ring_emit_hdp_flush(ring);
>         else
>                 amdgpu_asic_flush_hdp(adev, ring);
> 

The recommendation was to do a HDP flush at the time of packet
submission before write pointer update. Hence we added a dummy
emit_hdp_flush() to the ring which does nothing at this point and does a
flush only before it updates write pointer.

Thanks,
Lijo

> Regards,
> Christian.
> 
>>
>> Thanks,
>> Lijo
>>
>>> Regards,
>>> Christian.
>>>
>>>> Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 25
>>>> +++++++++++++++++++++++++
>>>>    1 file changed, 25 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
>>>> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
>>>> index 810bbfccd6f2..7b5ad13b618e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
>>>> @@ -36,6 +36,7 @@
>>>>    #include "vcn/vcn_4_0_3_offset.h"
>>>>    #include "vcn/vcn_4_0_3_sh_mask.h"
>>>>    #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
>>>> +#include <uapi/linux/kfd_ioctl.h>
>>>>      #define mmUVD_DPG_LMA_CTL        regUVD_DPG_LMA_CTL
>>>>    #define mmUVD_DPG_LMA_CTL_BASE_IDX    regUVD_DPG_LMA_CTL_BASE_IDX
>>>> @@ -1380,6 +1381,26 @@ static uint64_t
>>>> vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
>>>>                        regUVD_RB_WPTR);
>>>>    }
>>>>    +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring
>>>> *ring)
>>>> +{
>>>> +    /* VCN engine access for HDP flush doesn't work when RRMT is
>>>> enabled.
>>>> +     * This is a workaround to avoid any HDP flush through VCN ring.
>>>> Instead
>>>> +     * HDP flush will be done by driver while submitting doorbell.
>>>> +     */
>>>> +}
>>>> +
>>>> +static void vcn_v4_0_3_flush_hdp(struct amdgpu_ring *ring)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +
>>>> +#ifdef CONFIG_X86_64
>>>> +    if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
>>>> +        return;
>>>> +#endif
>>>> +    if (ring->wptr)
>>>> +        WREG32_NO_KIQ((adev->rmmio_remap.reg_offset +
>>>> KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
>>>> +}
>>>> +
>>>>    /**
>>>>     * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
>>>>     *
>>>> @@ -1394,6 +1415,9 @@ static void
>>>> vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
>>>>        if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
>>>>            DRM_ERROR("wrong ring id is identified in %s", __func__);
>>>>    +    /* Flush HDP before ringing doorbell */
>>>> +    vcn_v4_0_3_flush_hdp(ring);
>>>> +
>>>>        if (ring->use_doorbell) {
>>>>            *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
>>>>            WDOORBELL32(ring->doorbell_index,
>>>> lower_32_bits(ring->wptr));
>>>> @@ -1420,6 +1444,7 @@ static const struct amdgpu_ring_funcs
>>>> vcn_v4_0_3_unified_ring_vm_funcs = {
>>>>        .emit_ib = vcn_v2_0_enc_ring_emit_ib,
>>>>        .emit_fence = vcn_v2_0_enc_ring_emit_fence,
>>>>        .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
>>>> +    .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
>>>>        .test_ring = amdgpu_vcn_enc_ring_test_ring,
>>>>        .test_ib = amdgpu_vcn_unified_ring_test_ib,
>>>>        .insert_nop = amdgpu_ring_insert_nop,
> 


More information about the amd-gfx mailing list