[PATCH] drm/amdgpu: Enable tunneling on high-priority compute queues

Fri Dec 8 11:43:53 UTC 2023

On 08.12.23 10:51, Christian König wrote:
> Well longer story short Alex and I have been digging up the
> documentation for this and as far as we can tell this isn't correct.
Huh. I initially talked to Marek about this, adding him in Cc.
>
> You need to do quite a bit more before you can turn on this feature.
> What userspace side do you refer to?
I was referring to the Mesa merge request I made
(https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462).
If/When you have more details about what else needs to be done, feel
free to let me know.
I'm happy to expand this to add the rest of what's needed as well.

Thanks,
Friedrich

>
> Regards,
> Christian.
>
> Am 08.12.23 um 09:19 schrieb Friedrich Vock:
>> Friendly ping on this one.
>> Userspace side got merged, so would be great to land this patch too :)
>>
>> On 02.12.23 01:17, Friedrich Vock wrote:
>>> This improves latency if the GPU is already busy with other work.
>>> This is useful for VR compositors that submit highly latency-sensitive
>>> compositing work on high-priority compute queues while the GPU is busy
>>> rendering the next frame.
>>>
>>> Userspace merge request:
>>> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462
>>>
>>> Signed-off-by: Friedrich Vock <friedrich.vock at gmx.de>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 ++++++----
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  3 ++-
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   |  3 ++-
>>>   4 files changed, 11 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 9505dc8f9d69..4b923a156c4e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -790,6 +790,7 @@ struct amdgpu_mqd_prop {
>>>       uint64_t eop_gpu_addr;
>>>       uint32_t hqd_pipe_priority;
>>>       uint32_t hqd_queue_priority;
>>> +    bool allow_tunneling;
>>>       bool hqd_active;
>>>   };
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> index 231d49132a56..4d98e8879be8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
>>> @@ -620,6 +620,10 @@ static void amdgpu_ring_to_mqd_prop(struct
>>> amdgpu_ring *ring,
>>>                       struct amdgpu_mqd_prop *prop)
>>>   {
>>>       struct amdgpu_device *adev = ring->adev;
>>> +    bool is_high_prio_compute = ring->funcs->type ==
>>> AMDGPU_RING_TYPE_COMPUTE &&
>>> + amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
>>> +    bool is_high_prio_gfx = ring->funcs->type ==
>>> AMDGPU_RING_TYPE_GFX &&
>>> + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
>>>
>>>       memset(prop, 0, sizeof(*prop));
>>>
>>> @@ -637,10 +641,8 @@ static void amdgpu_ring_to_mqd_prop(struct
>>> amdgpu_ring *ring,
>>>        */
>>>       prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
>>>
>>> -    if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
>>> -         amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
>>> -        (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
>>> -         amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
>>> +    prop->allow_tunneling = is_high_prio_compute;
>>> +    if (is_high_prio_compute || is_high_prio_gfx) {
>>>           prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
>>>           prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
>>>       }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index c8a3bf01743f..73f6d7e72c73 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct
>>> amdgpu_device *adev, void *m,
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
>>>   #endif
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
>>> -    tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
>>> +    tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
>>> +                prop->allow_tunneling);
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
>>>       mqd->cp_hqd_pq_control = tmp;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>> index c659ef0f47ce..bdcf96df69e6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>> @@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct
>>> amdgpu_device *adev, void *m,
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
>>>                   (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
>>> -    tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
>>> +    tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
>>> +                prop->allow_tunneling);
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
>>>       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
>>>       mqd->cp_hqd_pq_control = tmp;
>>> --
>>> 2.43.0
>>>
>