<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">Am 24.04.19 um 05:02 schrieb Zhou,
      David(ChunMing):<br>
    </div>
    <blockquote type="cite"
cite="mid:MN2PR12MB2910C37264B8F30BAD26FA4CB43C0@MN2PR12MB2910.namprd12.prod.outlook.com">
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
      <meta name="Generator" content="Microsoft Word 15 (filtered
        medium)">
      <style><!--
/* Font Definitions */
@font-face
        {font-family:宋体;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"\@宋体";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:微软雅黑;
        panose-1:2 11 5 3 2 2 4 2 2 4;}
@font-face
        {font-family:"\@微软雅黑";
        panose-1:2 11 5 3 2 2 4 2 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;
        color:black;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p.msonormal0, li.msonormal0, div.msonormal0
        {mso-style-name:msonormal;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;
        color:black;}
p.emailquote, li.emailquote, div.emailquote
        {mso-style-name:emailquote;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:1.0pt;
        border:none;
        padding:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;
        color:black;}
span.EmailStyle20
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.25in 1.0in 1.25in;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
      <div class="WordSection1">
        <p class="MsoNormal">>> -           
          drm_sched_stop(&ring->sched, &job->base);<br>
          >> -<br>
          >>               /* after all hw jobs are reset, hw
          fence is meaningless, so force_completion */<br>
          >>              
          amdgpu_fence_driver_force_completion(ring);<br>
          >>       }<o:p></o:p></p>
        <p class="MsoNormal"><o:p> </o:p></p>
        <p class="MsoNormal">HW fence are already forced completion,
          then we can just disable irq fence process and ignore hw fence
          signal when we are trying to do GPU reset, I think. Otherwise
          which will make the logic much more complex.<o:p></o:p></p>
        <p class="MsoNormal"><span style="color:windowtext">If this
            situation happens because of long time execution, we can
            increase timeout of reset detection.</span></p>
      </div>
    </blockquote>
    <br>
    You are not thinking widely enough, forcing the hw fence to complete
    can trigger other to start other activity in the system.<br>
    <br>
    We first need to stop everything and make sure that we don't do any
    processing any more and then start with our reset procedure
    including forcing all hw fences to complete.<br>
    <br>
    Christian.<br>
    <br>
    <blockquote type="cite"
cite="mid:MN2PR12MB2910C37264B8F30BAD26FA4CB43C0@MN2PR12MB2910.namprd12.prod.outlook.com">
      <div class="WordSection1">
        <p class="MsoNormal"><span style="color:windowtext"><o:p></o:p></span></p>
        <p class="MsoNormal"><span style="color:windowtext"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span style="color:windowtext">-David<o:p></o:p></span></p>
        <p class="MsoNormal"><span style="color:windowtext"><o:p> </o:p></span></p>
        <div style="border:none;border-left:solid blue 1.5pt;padding:0in
          0in 0in 4.0pt">
          <div>
            <div style="border:none;border-top:solid #E1E1E1
              1.0pt;padding:3.0pt 0in 0in 0in">
              <p class="MsoNormal"><b><span style="color:windowtext">From:</span></b><span
                  style="color:windowtext"> amd-gfx
                  <a class="moz-txt-link-rfc2396E" href="mailto:amd-gfx-bounces@lists.freedesktop.org"><amd-gfx-bounces@lists.freedesktop.org></a>
                  <b>On Behalf Of </b>Grodzovsky, Andrey<br>
                  <b>Sent:</b> Wednesday, April 24, 2019 12:00 AM<br>
                  <b>To:</b> Zhou, David(ChunMing)
                  <a class="moz-txt-link-rfc2396E" href="mailto:David1.Zhou@amd.com"><David1.Zhou@amd.com></a>;
                  <a class="moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org">dri-devel@lists.freedesktop.org</a>;
                  <a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>; <a class="moz-txt-link-abbreviated" href="mailto:eric@anholt.net">eric@anholt.net</a>;
                  <a class="moz-txt-link-abbreviated" href="mailto:etnaviv@lists.freedesktop.org">etnaviv@lists.freedesktop.org</a>;
                  <a class="moz-txt-link-abbreviated" href="mailto:ckoenig.leichtzumerken@gmail.com">ckoenig.leichtzumerken@gmail.com</a><br>
                  <b>Cc:</b> Kazlauskas, Nicholas
                  <a class="moz-txt-link-rfc2396E" href="mailto:Nicholas.Kazlauskas@amd.com"><Nicholas.Kazlauskas@amd.com></a>; Liu, Monk
                  <a class="moz-txt-link-rfc2396E" href="mailto:Monk.Liu@amd.com"><Monk.Liu@amd.com></a><br>
                  <b>Subject:</b> Re: [PATCH v5 6/6] drm/amdgpu: Avoid
                  HW reset if guilty job already signaled.<o:p></o:p></span></p>
            </div>
          </div>
          <p class="MsoNormal"><o:p> </o:p></p>
          <p>No, i mean the actual HW fence which signals when the job
            finished execution on the HW.<o:p></o:p></p>
          <p>Andrey<o:p></o:p></p>
          <div>
            <p class="MsoNormal">On 4/23/19 11:19 AM, Zhou,
              David(ChunMing) wrote:<o:p></o:p></p>
          </div>
          <blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
            <div>
              <p class="MsoNormal" style="margin-bottom:12.0pt">do you
                mean fence timer? why not stop it as well when stopping
                sched for the reason of hw reset?<br>
                <br>
                -------- Original Message --------<br>
                Subject: Re: [PATCH v5 6/6] drm/amdgpu: Avoid HW reset
                if guilty job already signaled.<br>
                From: "Grodzovsky, Andrey" <br>
                To: "Zhou, David(ChunMing)" ,<a
href="mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org,eric@anholt.net,etnaviv@lists.freedesktop.org,ckoenig.leichtzumerken@gmail.com"
                  moz-do-not-send="true">dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org,eric@anholt.net,etnaviv@lists.freedesktop.org,ckoenig.leichtzumerken@gmail.com</a><br>
                CC: "Kazlauskas, Nicholas" ,"Liu, Monk" <o:p></o:p></p>
            </div>
            <div>
              <p class="MsoNormal"><br>
                On 4/22/19 9:09 AM, Zhou, David(ChunMing) wrote:<br>
                > +Monk.<br>
                ><br>
                > GPU reset is used widely in SRIOV, so need
                virtulizatino guy take a look.<br>
                ><br>
                > But out of curious, why guilty job can signal more
                if the job is already<br>
                > set to guilty? set it wrongly?<br>
                ><br>
                ><br>
                > -David<br>
                <br>
                <br>
                It's possible that the job does completes at a later
                time then it's <br>
                timeout handler started processing so in this patch we
                try to protect <br>
                against this by rechecking the HW fence after stopping
                all SW <br>
                schedulers. We do it BEFORE marking guilty on the job's
                sched_entity so <br>
                at the point we check the guilty flag is not set yet.<br>
                <br>
                Andrey<br>
                <br>
                <br>
                ><br>
                > <span
                  style="font-family:"微软雅黑",sans-serif"
                  lang="ZH-CN">在</span> 2019/4/18 23:00, Andrey
                Grodzovsky
                <span style="font-family:"微软雅黑",sans-serif"
                  lang="ZH-CN">写道</span>:<br>
                >> Also reject TDRs if another one already
                running.<br>
                >><br>
                >> v2:<br>
                >> Stop all schedulers across device and entire
                XGMI hive before<br>
                >> force signaling HW fences.<br>
                >> Avoid passing job_signaled to helper fnctions
                to keep all the decision<br>
                >> making about skipping HW reset in one place.<br>
                >><br>
                >> v3:<br>
                >> Fix SW sched. hang after non HW reset.
                sched.hw_rq_count has to be balanced<br>
                >> against it's decrement in drm_sched_stop in non
                HW reset case.<br>
                >> v4: rebase<br>
                >> v5: Revert v3 as we do it now in sceduler code.<br>
                >><br>
                >> Signed-off-by: Andrey Grodzovsky <a
                  href="mailto:andrey.grodzovsky@amd.com"
                  moz-do-not-send="true"><andrey.grodzovsky@amd.com></a><br>
                >> ---<br>
                >>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |
                143 +++++++++++++++++++----------<br>
                >>    1 file changed, 95 insertions(+), 48
                deletions(-)<br>
                >><br>
                >> diff --git
                a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
                b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
                >> index a0e165c..85f8792 100644<br>
                >> ---
                a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
                >> +++
                b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
                >> @@ -3334,8 +3334,6 @@ static int
                amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,<br>
                >>               if (!ring ||
                !ring->sched.thread)<br>
                >>                       continue;<br>
                >>    <br>
                >> -           
                drm_sched_stop(&ring->sched, &job->base);<br>
                >> -<br>
                >>               /* after all hw jobs are reset,
                hw fence is meaningless, so force_completion */<br>
                >>              
                amdgpu_fence_driver_force_completion(ring);<br>
                >>       }<br>
                >> @@ -3343,6 +3341,7 @@ static int
                amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,<br>
                >>       if(job)<br>
                >>              
                drm_sched_increase_karma(&job->base);<br>
                >>    <br>
                >> +    /* Don't suspend on bare metal if we are
                not going to HW reset the ASIC */<br>
                >>       if (!amdgpu_sriov_vf(adev)) {<br>
                >>    <br>
                >>               if (!need_full_reset)<br>
                >> @@ -3480,37 +3479,21 @@ static int
                amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,<br>
                >>       return r;<br>
                >>    }<br>
                >>    <br>
                >> -static void
                amdgpu_device_post_asic_reset(struct amdgpu_device
                *adev)<br>
                >> +static bool amdgpu_device_lock_adev(struct
                amdgpu_device *adev, bool trylock)<br>
                >>    {<br>
                >> -    int i;<br>
                >> -<br>
                >> -    for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
                {<br>
                >> -            struct amdgpu_ring *ring =
                adev->rings[i];<br>
                >> -<br>
                >> -            if (!ring ||
                !ring->sched.thread)<br>
                >> -                    continue;<br>
                >> -<br>
                >> -            if (!adev->asic_reset_res)<br>
                >> -                   
                drm_sched_resubmit_jobs(&ring->sched);<br>
                >> +    if (trylock) {<br>
                >> +            if
                (!mutex_trylock(&adev->lock_reset))<br>
                >> +                    return false;<br>
                >> +    } else<br>
                >> +           
                mutex_lock(&adev->lock_reset);<br>
                >>    <br>
                >> -           
                drm_sched_start(&ring->sched,
                !adev->asic_reset_res);<br>
                >> -    }<br>
                >> -<br>
                >> -    if (!amdgpu_device_has_dc_support(adev)) {<br>
                >> -           
                drm_helper_resume_force_mode(adev->ddev);<br>
                >> -    }<br>
                >> -<br>
                >> -    adev->asic_reset_res = 0;<br>
                >> -}<br>
                >> -<br>
                >> -static void amdgpu_device_lock_adev(struct
                amdgpu_device *adev)<br>
                >> -{<br>
                >> -    mutex_lock(&adev->lock_reset);<br>
                >>      
                atomic_inc(&adev->gpu_reset_counter);<br>
                >>       adev->in_gpu_reset = 1;<br>
                >>       /* Block kfd: SRIOV would do it
                separately */<br>
                >>       if (!amdgpu_sriov_vf(adev))<br>
                >>                   
                amdgpu_amdkfd_pre_reset(adev);<br>
                >> +<br>
                >> +    return true;<br>
                >>    }<br>
                >>    <br>
                >>    static void amdgpu_device_unlock_adev(struct
                amdgpu_device *adev)<br>
                >> @@ -3538,40 +3521,42 @@ static void
                amdgpu_device_unlock_adev(struct amdgpu_device *adev)<br>
                >>    int amdgpu_device_gpu_recover(struct
                amdgpu_device *adev,<br>
                >>                             struct amdgpu_job
                *job)<br>
                >>    {<br>
                >> -    int r;<br>
                >> +    struct list_head device_list,
                *device_list_handle =  NULL;<br>
                >> +    bool need_full_reset, job_signaled;<br>
                >>       struct amdgpu_hive_info *hive = NULL;<br>
                >> -    bool need_full_reset = false;<br>
                >>       struct amdgpu_device *tmp_adev = NULL;<br>
                >> -    struct list_head device_list,
                *device_list_handle =  NULL;<br>
                >> +    int i, r = 0;<br>
                >>    <br>
                >> +    need_full_reset = job_signaled = false;<br>
                >>       INIT_LIST_HEAD(&device_list);<br>
                >>    <br>
                >>       dev_info(adev->dev, "GPU reset
                begin!\n");<br>
                >>    <br>
                >> +    hive = amdgpu_get_xgmi_hive(adev, false);<br>
                >> +<br>
                >>       /*<br>
                >> -     * In case of XGMI hive disallow
                concurrent resets to be triggered<br>
                >> -     * by different nodes. No point also since
                the one node already executing<br>
                >> -     * reset will also reset all the other
                nodes in the hive.<br>
                >> +     * Here we trylock to avoid chain of
                resets executing from<br>
                >> +     * either trigger by jobs on different
                adevs in XGMI hive or jobs on<br>
                >> +     * different schedulers for same device
                while this TO handler is running.<br>
                >> +     * We always reset all schedulers for
                device and all devices for XGMI<br>
                >> +     * hive so that should take care of them
                too.<br>
                >>        */<br>
                >> -    hive = amdgpu_get_xgmi_hive(adev, 0);<br>
                >> -    if (hive &&
                adev->gmc.xgmi.num_physical_nodes > 1 &&<br>
                >> -       
                !mutex_trylock(&hive->reset_lock))<br>
                >> +<br>
                >> +    if (hive &&
                !mutex_trylock(&hive->reset_lock)) {<br>
                >> +            DRM_INFO("Bailing on TDR for
                s_job:%llx, hive: %llx as another already in progress",<br>
                >> +                     job->base.id,
                hive->hive_id);<br>
                >>               return 0;<br>
                >> +    }<br>
                >>    <br>
                >>       /* Start with adev pre asic reset first
                for soft reset check.*/<br>
                >> -    amdgpu_device_lock_adev(adev);<br>
                >> -    r = amdgpu_device_pre_asic_reset(adev,<br>
                >> -                                     job,<br>
                >> -                                    
                &need_full_reset);<br>
                >> -    if (r) {<br>
                >> -            /*TODO Should we stop ?*/<br>
                >> -            DRM_ERROR("GPU pre asic reset
                failed with err, %d for drm dev, %s ",<br>
                >> -                      r,
                adev->ddev->unique);<br>
                >> -            adev->asic_reset_res = r;<br>
                >> +    if (!amdgpu_device_lock_adev(adev, !hive))
                {<br>
                >> +            DRM_INFO("Bailing on TDR for
                s_job:%llx, as another already in progress",<br>
                >> +                                    
                job->base.id);<br>
                >> +            return 0;<br>
                >>       }<br>
                >>    <br>
                >>       /* Build list of devices to reset */<br>
                >> -    if  (need_full_reset &&
                adev->gmc.xgmi.num_physical_nodes > 1) {<br>
                >> +    if  (adev->gmc.xgmi.num_physical_nodes
                > 1) {<br>
                >>               if (!hive) {<br>
                >>                      
                amdgpu_device_unlock_adev(adev);<br>
                >>                       return -ENODEV;<br>
                >> @@ -3588,13 +3573,56 @@ int
                amdgpu_device_gpu_recover(struct amdgpu_device *adev,<br>
                >>               device_list_handle =
                &device_list;<br>
                >>       }<br>
                >>    <br>
                >> +    /* block all schedulers and reset given
                job's ring */<br>
                >> +    list_for_each_entry(tmp_adev,
                device_list_handle, gmc.xgmi.head) {<br>
                >> +            for (i = 0; i <
                AMDGPU_MAX_RINGS; ++i) {<br>
                >> +                    struct amdgpu_ring *ring =
                tmp_adev->rings[i];<br>
                >> +<br>
                >> +                    if (!ring ||
                !ring->sched.thread)<br>
                >> +                            continue;<br>
                >> +<br>
                >> +                   
                drm_sched_stop(&ring->sched, &job->base);<br>
                >> +            }<br>
                >> +    }<br>
                >> +<br>
                >> +<br>
                >> +    /*<br>
                >> +     * Must check guilty signal here since
                after this point all old<br>
                >> +     * HW fences are force signaled.<br>
                >> +     *<br>
                >> +     * job->base holds a reference to
                parent fence<br>
                >> +     */<br>
                >> +    if (job &&
                job->base.s_fence->parent &&<br>
                >> +       
                dma_fence_is_signaled(job->base.s_fence->parent))<br>
                >> +            job_signaled = true;<br>
                >> +<br>
                >> +    if
                (!amdgpu_device_ip_need_full_reset(adev))<br>
                >> +            device_list_handle =
                &device_list;<br>
                >> +<br>
                >> +    if (job_signaled) {<br>
                >> +            dev_info(adev->dev, "Guilty job
                already signaled, skipping HW reset");<br>
                >> +            goto skip_hw_reset;<br>
                >> +    }<br>
                >> +<br>
                >> +<br>
                >> +    /* Guilty job will be freed after this*/<br>
                >> +    r = amdgpu_device_pre_asic_reset(adev,<br>
                >> +                                     job,<br>
                >> +                                    
                &need_full_reset);<br>
                >> +    if (r) {<br>
                >> +            /*TODO Should we stop ?*/<br>
                >> +            DRM_ERROR("GPU pre asic reset
                failed with err, %d for drm dev, %s ",<br>
                >> +                      r,
                adev->ddev->unique);<br>
                >> +            adev->asic_reset_res = r;<br>
                >> +    }<br>
                >> +<br>
                >>    retry:    /* Rest of adevs pre asic reset
                from XGMI hive. */<br>
                >>       list_for_each_entry(tmp_adev,
                device_list_handle, gmc.xgmi.head) {<br>
                >>    <br>
                >>               if (tmp_adev == adev)<br>
                >>                       continue;<br>
                >>    <br>
                >> -            amdgpu_device_lock_adev(tmp_adev);<br>
                >> +            amdgpu_device_lock_adev(tmp_adev,
                false);<br>
                >>               r =
                amdgpu_device_pre_asic_reset(tmp_adev,<br>
                >>                                               
                NULL,<br>
                >>                                               
                &need_full_reset);<br>
                >> @@ -3618,9 +3646,28 @@ int
                amdgpu_device_gpu_recover(struct amdgpu_device *adev,<br>
                >>                       goto retry;<br>
                >>       }<br>
                >>    <br>
                >> +skip_hw_reset:<br>
                >> +<br>
                >>       /* Post ASIC reset for all devs .*/<br>
                >>       list_for_each_entry(tmp_adev,
                device_list_handle, gmc.xgmi.head) {<br>
                >> -           
                amdgpu_device_post_asic_reset(tmp_adev);<br>
                >> +            for (i = 0; i <
                AMDGPU_MAX_RINGS; ++i) {<br>
                >> +                    struct amdgpu_ring *ring =
                tmp_adev->rings[i];<br>
                >> +<br>
                >> +                    if (!ring ||
                !ring->sched.thread)<br>
                >> +                            continue;<br>
                >> +<br>
                >> +                    /* No point to resubmit
                jobs if we didn't HW reset*/<br>
                >> +                    if
                (!tmp_adev->asic_reset_res && !job_signaled)<br>
                >> +                           
                drm_sched_resubmit_jobs(&ring->sched);<br>
                >> +<br>
                >> +                   
                drm_sched_start(&ring->sched,
                !tmp_adev->asic_reset_res);<br>
                >> +            }<br>
                >> +<br>
                >> +            if
                (!amdgpu_device_has_dc_support(tmp_adev) &&
                !job_signaled) {<br>
                >> +                   
                drm_helper_resume_force_mode(tmp_adev->ddev);<br>
                >> +            }<br>
                >> +<br>
                >> +            tmp_adev->asic_reset_res = 0;<br>
                >>    <br>
                >>               if (r) {<br>
                >>                       /* bad news, how to tell
                it to userspace ? */<br>
                >> @@ -3633,7 +3680,7 @@ int
                amdgpu_device_gpu_recover(struct amdgpu_device *adev,<br>
                >>              
                amdgpu_device_unlock_adev(tmp_adev);<br>
                >>       }<br>
                >>    <br>
                >> -    if (hive &&
                adev->gmc.xgmi.num_physical_nodes > 1)<br>
                >> +    if (hive)<br>
                >>              
                mutex_unlock(&hive->reset_lock);<br>
                >>    <br>
                >>       if (r)<br>
                _______________________________________________<br>
                amd-gfx mailing list<br>
                <a href="mailto:amd-gfx@lists.freedesktop.org"
                  moz-do-not-send="true">amd-gfx@lists.freedesktop.org</a><br>
                <a
                  href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx"
                  moz-do-not-send="true">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a><o:p></o:p></p>
            </div>
          </blockquote>
        </div>
      </div>
    </blockquote>
    <br>
  </body>
</html>