<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#008000;margin:15pt;" align="Left">
[Public]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
If it applies cleanly, feel free to drop it in.  I'll drop those patches for drm-next since they are already in drm-misc.</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Alex</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Andrey Grodzovsky <andrey.grodzovsky@amd.com><br>
<b>Sent:</b> Thursday, February 24, 2022 11:24 AM<br>
<b>To:</b> Chen, JingWen <JingWen.Chen2@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Liu, Monk <Monk.Liu@amd.com>; Chen, Horace <Horace.Chen@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; daniel@ffwll.ch <daniel@ffwll.ch><br>
<b>Subject:</b> Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">No because all the patch-set including this patch was landed into
<br>
drm-misc-next and will reach amd-staging-drm-next on the next upstream <br>
rebase i guess.<br>
<br>
Andrey<br>
<br>
On 2022-02-24 01:47, JingWen Chen wrote:<br>
> Hi Andrey,<br>
><br>
> Will you port this patch into amd-staging-drm-next?<br>
><br>
> on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:<br>
>> All comments are fixed and code pushed. Thanks for everyone<br>
>> who helped reviewing.<br>
>><br>
>> Andrey<br>
>><br>
>> On 2022-02-09 02:53, Christian König wrote:<br>
>>> Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:<br>
>>>> Before we initialize schedulers we must know which reset<br>
>>>> domain are we in - for single device there iis a single<br>
>>>> domain per device and so single wq per device. For XGMI<br>
>>>> the reset domain spans the entire XGMI hive and so the<br>
>>>> reset wq is per hive.<br>
>>>><br>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com><br>
>>> One more comment below, with that fixed Reviewed-by: Christian König <christian.koenig@amd.com>.<br>
>>><br>
>>>> ---<br>
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++++++++++++++++++++++<br>
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--------------<br>
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +<br>
>>>>    3 files changed, 51 insertions(+), 30 deletions(-)<br>
>>>><br>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
>>>> index 9704b0e1fd82..00123b0013d3 100644<br>
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
>>>> @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)<br>
>>>>        return r;<br>
>>>>    }<br>
>>>>    +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)<br>
>>>> +{<br>
>>>> +    long timeout;<br>
>>>> +    int r, i;<br>
>>>> +<br>
>>>> +    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {<br>
>>>> +        struct amdgpu_ring *ring = adev->rings[i];<br>
>>>> +<br>
>>>> +        /* No need to setup the GPU scheduler for rings that don't need it */<br>
>>>> +        if (!ring || ring->no_scheduler)<br>
>>>> +            continue;<br>
>>>> +<br>
>>>> +        switch (ring->funcs->type) {<br>
>>>> +        case AMDGPU_RING_TYPE_GFX:<br>
>>>> +            timeout = adev->gfx_timeout;<br>
>>>> +            break;<br>
>>>> +        case AMDGPU_RING_TYPE_COMPUTE:<br>
>>>> +            timeout = adev->compute_timeout;<br>
>>>> +            break;<br>
>>>> +        case AMDGPU_RING_TYPE_SDMA:<br>
>>>> +            timeout = adev->sdma_timeout;<br>
>>>> +            break;<br>
>>>> +        default:<br>
>>>> +            timeout = adev->video_timeout;<br>
>>>> +            break;<br>
>>>> +        }<br>
>>>> +<br>
>>>> +        r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,<br>
>>>> +                   ring->num_hw_submission, amdgpu_job_hang_limit,<br>
>>>> +                   timeout, adev->reset_domain.wq, ring->sched_score, ring->name);<br>
>>>> +        if (r) {<br>
>>>> +            DRM_ERROR("Failed to create scheduler on ring %s.\n",<br>
>>>> +                  ring->name);<br>
>>>> +            return r;<br>
>>>> +        }<br>
>>>> +    }<br>
>>>> +<br>
>>>> +    return 0;<br>
>>>> +}<br>
>>>> +<br>
>>>> +<br>
>>>>    /**<br>
>>>>     * amdgpu_device_ip_init - run init for hardware IPs<br>
>>>>     *<br>
>>>> @@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)<br>
>>>>            }<br>
>>>>        }<br>
>>>>    +    r = amdgpu_device_init_schedulers(adev);<br>
>>>> +    if (r)<br>
>>>> +        goto init_failed;<br>
>>>> +<br>
>>>>        /* Don't init kfd if whole hive need to be reset during init */<br>
>>>>        if (!adev->gmc.xgmi.pending_reset)<br>
>>>>            amdgpu_amdkfd_device_init(adev);<br>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c<br>
>>>> index 45977a72b5dd..fa302540c69a 100644<br>
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c<br>
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c<br>
>>>> @@ -457,8 +457,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,<br>
>>>>                      atomic_t *sched_score)<br>
>>>>    {<br>
>>>>        struct amdgpu_device *adev = ring->adev;<br>
>>>> -    long timeout;<br>
>>>> -    int r;<br>
>>>>          if (!adev)<br>
>>>>            return -EINVAL;<br>
>>>> @@ -478,36 +476,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,<br>
>>>>        spin_lock_init(&ring->fence_drv.lock);<br>
>>>>        ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),<br>
>>>>                         GFP_KERNEL);<br>
>>>> -    if (!ring->fence_drv.fences)<br>
>>>> -        return -ENOMEM;<br>
>>>>    -    /* No need to setup the GPU scheduler for rings that don't need it */<br>
>>>> -    if (ring->no_scheduler)<br>
>>>> -        return 0;<br>
>>>> +    ring->num_hw_submission = num_hw_submission;<br>
>>>> +    ring->sched_score = sched_score;<br>
>>> Let's move this into the caller and then use ring->num_hw_submission in the fence code as well.<br>
>>><br>
>>> The maximum number of jobs on the ring is not really fence specific.<br>
>>><br>
>>> Regards,<br>
>>> Christian.<br>
>>><br>
>>>>    -    switch (ring->funcs->type) {<br>
>>>> -    case AMDGPU_RING_TYPE_GFX:<br>
>>>> -        timeout = adev->gfx_timeout;<br>
>>>> -        break;<br>
>>>> -    case AMDGPU_RING_TYPE_COMPUTE:<br>
>>>> -        timeout = adev->compute_timeout;<br>
>>>> -        break;<br>
>>>> -    case AMDGPU_RING_TYPE_SDMA:<br>
>>>> -        timeout = adev->sdma_timeout;<br>
>>>> -        break;<br>
>>>> -    default:<br>
>>>> -        timeout = adev->video_timeout;<br>
>>>> -        break;<br>
>>>> -    }<br>
>>>> -<br>
>>>> -    r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,<br>
>>>> -               num_hw_submission, amdgpu_job_hang_limit,<br>
>>>> -               timeout, NULL, sched_score, ring->name);<br>
>>>> -    if (r) {<br>
>>>> -        DRM_ERROR("Failed to create scheduler on ring %s.\n",<br>
>>>> -              ring->name);<br>
>>>> -        return r;<br>
>>>> -    }<br>
>>>> +    if (!ring->fence_drv.fences)<br>
>>>> +        return -ENOMEM;<br>
>>>>          return 0;<br>
>>>>    }<br>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h<br>
>>>> index fae7d185ad0d..7f20ce73a243 100644<br>
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h<br>
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h<br>
>>>> @@ -251,6 +251,8 @@ struct amdgpu_ring {<br>
>>>>        bool            has_compute_vm_bug;<br>
>>>>        bool            no_scheduler;<br>
>>>>        int            hw_prio;<br>
>>>> +    unsigned         num_hw_submission;<br>
>>>> +    atomic_t        *sched_score;<br>
>>>>    };<br>
>>>>      #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))<br>
</div>
</span></font></div>
</div>
</body>
</html>