<html>
  <head>
    <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">
      <blockquote type="cite"><font size="2"><span
            style="font-size:10pt;">
            <div class="PlainText">+       /* now we are okay to resume
              SMC/CP/SDMA */<br>
              +       amdgpu_resume_late(adev);<br>
            </div>
          </span></font>
      </blockquote>
      As I wrote in the other thread as well calling amdgpu_resume()
      without proper suspend will just mess up a whole bunch of internal
      structures.<br>
      <br>
      So a clear NAK on that approach. If you don't need the hw stop
      which amdgpu_suspend() does for SRIOV then please try to just use
      the hw_init() callback and not the resume() callback.<br>
      <br>
      Regards,<br>
      Christian.<br>
      <br>
      Am 07.02.2017 um 07:26 schrieb Liu, Monk:<br>
    </div>
    <blockquote
cite="mid:DM5PR12MB16109EC5F03088C1CFB58FE484430@DM5PR12MB1610.namprd12.prod.outlook.com"
      type="cite">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <meta name="Generator" content="Microsoft Exchange Server">
      <!-- converted from text -->
      <style><!-- .EmailQuote { margin-left: 1pt; padding-left: 4pt; border-left: #800000 2px solid; } --></style>
      <meta content="text/html; charset=UTF-8">
      <style type="text/css" style="">
<!--
p
        {margin-top:0;
        margin-bottom:0}
-->
</style>
      <div dir="ltr">
        <div id="x_divtagdefaultwrapper" dir="ltr"
          style="font-size:12pt; color:#000000;
          font-family:Calibri,Arial,Helvetica,sans-serif">
          <p>patch 1-8 are some fixes for sriov gpu reset feature</p>
          <p>patch 9 -20 are for sriov gpu reset</p>
          <p><br>
          </p>
          <p>BR Monk<br>
          </p>
        </div>
        <hr tabindex="-1" style="display:inline-block; width:98%">
        <div id="x_divRplyFwdMsg" dir="ltr"><font style="font-size:11pt"
            color="#000000" face="Calibri, sans-serif"><b>发件人:</b>
            amd-gfx <a class="moz-txt-link-rfc2396E" href="mailto:amd-gfx-bounces@lists.freedesktop.org"><amd-gfx-bounces@lists.freedesktop.org></a> 代表
            Monk Liu <a class="moz-txt-link-rfc2396E" href="mailto:Monk.Liu@amd.com"><Monk.Liu@amd.com></a><br>
            <b>发送时间:</b> 2017年2月7日 14:11:07<br>
            <b>收件人:</b> <a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a><br>
            <b>抄送:</b> Liu, Monk<br>
            <b>主题:</b> [PATCH 09/20] drm/amdgpu:implement SRIOV
            gpu_reset</font>
          <div> </div>
        </div>
      </div>
      <font size="2"><span style="font-size:10pt;">
          <div class="PlainText">Signed-off-by: Monk Liu
            <a class="moz-txt-link-rfc2396E" href="mailto:Monk.Liu@amd.com"><Monk.Liu@amd.com></a><br>
            ---<br>
             drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 158
            ++++++++++++++++++++++++++++-<br>
             drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |   1 +<br>
             2 files changed, 158 insertions(+), 1 deletion(-)<br>
            <br>
            diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
            b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
            index e926f84..2b404ca 100644<br>
            --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
            +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
            @@ -1604,6 +1604,53 @@ int amdgpu_suspend(struct
            amdgpu_device *adev)<br>
                     return 0;<br>
             }<br>
             <br>
            +static int amdgpu_resume_early(struct amdgpu_device *adev)<br>
            +{<br>
            +       int i, r;<br>
            +<br>
            +       for (i = 0; i < adev->num_ip_blocks; i++) {<br>
            +               if (!adev->ip_blocks[i].status.valid)<br>
            +                       continue;<br>
            +<br>
            +               if (adev->ip_blocks[i].version->type
            == AMD_IP_BLOCK_TYPE_COMMON ||<br>
            +                              
            adev->ip_blocks[i].version->type ==
            AMD_IP_BLOCK_TYPE_GMC ||<br>
            +                              
            adev->ip_blocks[i].version->type ==
            AMD_IP_BLOCK_TYPE_IH)<br>
            +                       r =
            adev->ip_blocks[i].version->funcs->resume(adev);<br>
            +<br>
            +               if (r) {<br>
            +                       DRM_ERROR("resume of IP block
            <%s> failed %d\n",<br>
            +                                
            adev->ip_blocks[i].version->funcs->name, r);<br>
            +                       return r;<br>
            +               }<br>
            +       }<br>
            +<br>
            +       return 0;<br>
            +}<br>
            +<br>
            +static int amdgpu_resume_late(struct amdgpu_device *adev)<br>
            +{<br>
            +       int i, r;<br>
            +<br>
            +       for (i = 0; i < adev->num_ip_blocks; i++) {<br>
            +               if (!adev->ip_blocks[i].status.valid)<br>
            +                       continue;<br>
            +<br>
            +               if (adev->ip_blocks[i].version->type
            == AMD_IP_BLOCK_TYPE_COMMON ||<br>
            +                              
            adev->ip_blocks[i].version->type ==
            AMD_IP_BLOCK_TYPE_GMC ||<br>
            +                              
            adev->ip_blocks[i].version->type ==
            AMD_IP_BLOCK_TYPE_IH )<br>
            +                       continue;<br>
            +<br>
            +               r =
            adev->ip_blocks[i].version->funcs->resume(adev);<br>
            +               if (r) {<br>
            +                       DRM_ERROR("resume of IP block
            <%s> failed %d\n",<br>
            +                                
            adev->ip_blocks[i].version->funcs->name, r);<br>
            +                       return r;<br>
            +               }<br>
            +       }<br>
            +<br>
            +       return 0;<br>
            +}<br>
            +<br>
             static int amdgpu_resume(struct amdgpu_device *adev)<br>
             {<br>
                     int i, r;<br>
            @@ -2343,6 +2390,115 @@ static int
            amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,<br>
             }<br>
             <br>
             /**<br>
            + * amdgpu_sriov_gpu_reset - reset the asic<br>
            + *<br>
            + * @adev: amdgpu device pointer<br>
            + * @voluntary: if this reset is requested by guest.<br>
            + *             (true means by guest and false means by
            HYPERVISOR )<br>
            + *<br>
            + * Attempt the reset the GPU if it has hung (all asics).<br>
            + * for SRIOV case.<br>
            + * Returns 0 for success or an error on failure.<br>
            + */<br>
            +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool
            voluntary)<br>
            +{<br>
            +       int i, r = 0;<br>
            +       int resched;<br>
            +       struct amdgpu_bo *bo, *tmp;<br>
            +       struct amdgpu_ring *ring;<br>
            +       struct fence *fence = NULL, *next = NULL;<br>
            +<br>
            +       mutex_lock(&adev->virt.lock_reset);<br>
            +       atomic_inc(&adev->gpu_reset_counter);<br>
            +<br>
            +       /* block TTM */<br>
            +       resched =
            ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);<br>
            +<br>
            +       /* block scheduler */<br>
            +       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {<br>
            +               ring = adev->rings[i];<br>
            +<br>
            +               if (!ring || !ring->sched.thread)<br>
            +                       continue;<br>
            +<br>
            +               kthread_park(ring->sched.thread);<br>
            +               amd_sched_hw_job_reset(&ring->sched);<br>
            +       }<br>
            +<br>
            +       /* after all hw jobs are reset, hw fence is
            meaningless, so force_completion */<br>
            +       amdgpu_fence_driver_force_completion(adev);<br>
            +<br>
            +       /* request to take full control of GPU before
            re-initialization  */<br>
            +       if (voluntary)<br>
            +               amdgpu_virt_reset_gpu(adev);<br>
            +       else<br>
            +               amdgpu_virt_request_full_gpu(adev, true);<br>
            +<br>
            +<br>
            +       /* Resume IP prior to SMC */<br>
            +       amdgpu_resume_early(adev);<br>
            +<br>
            +       /* we need recover gart prior to run SMC/CP/SDMA
            resume */<br>
            +       amdgpu_ttm_recover_gart(adev);<br>
            +<br>
            +       /* now we are okay to resume SMC/CP/SDMA */<br>
            +       amdgpu_resume_late(adev);<br>
          </div>
        </span></font></blockquote>
    <br>
    <br>
    <blockquote
cite="mid:DM5PR12MB16109EC5F03088C1CFB58FE484430@DM5PR12MB1610.namprd12.prod.outlook.com"
      type="cite"><font size="2"><span style="font-size:10pt;">
          <div class="PlainText">+<br>
            +       amdgpu_irq_gpu_reset_resume_helper(adev);<br>
            +<br>
            +       if (amdgpu_ib_ring_tests(adev))<br>
            +               dev_err(adev->dev, "[GPU_RESET] ib ring
            test failed (%d).\n", r);<br>
            +<br>
            +       /* rellease full control of GPU after ib test */<br>
            +       amdgpu_virt_release_full_gpu(adev, true);<br>
            +<br>
            +       DRM_INFO("recover vram bo from shadow\n");<br>
            +<br>
            +       ring = adev->mman.buffer_funcs_ring;<br>
            +       mutex_lock(&adev->shadow_list_lock);<br>
            +       list_for_each_entry_safe(bo, tmp,
            &adev->shadow_list, shadow_list) {<br>
            +               amdgpu_recover_vram_from_shadow(adev, ring,
            bo, &next);<br>
            +               if (fence) {<br>
            +                       r = fence_wait(fence, false);<br>
            +                       if (r) {<br>
            +                               WARN(r, "recovery from
            shadow isn't completed\n");<br>
            +                               break;<br>
            +                       }<br>
            +               }<br>
            +<br>
            +               fence_put(fence);<br>
            +               fence = next;<br>
            +       }<br>
            +       mutex_unlock(&adev->shadow_list_lock);<br>
            +<br>
            +       if (fence) {<br>
            +               r = fence_wait(fence, false);<br>
            +               if (r)<br>
            +                       WARN(r, "recovery from shadow isn't
            completed\n");<br>
            +       }<br>
            +       fence_put(fence);<br>
            +<br>
            +       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {<br>
            +               struct amdgpu_ring *ring =
            adev->rings[i];<br>
            +               if (!ring || !ring->sched.thread)<br>
            +                       continue;<br>
            +<br>
            +               amd_sched_job_recovery(&ring->sched);<br>
            +               kthread_unpark(ring->sched.thread);<br>
            +       }<br>
            +<br>
            +       drm_helper_resume_force_mode(adev->ddev);<br>
            +      
            ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev,
            resched);<br>
            +       if (r) {<br>
            +               /* bad news, how to tell it to userspace ?
            */<br>
            +               dev_info(adev->dev, "GPU reset
            failed\n");<br>
            +       }<br>
            +<br>
            +       mutex_unlock(&adev->virt.lock_reset);<br>
            +       return r;<br>
            +}<br>
            +<br>
            +/**<br>
              * amdgpu_gpu_reset - reset the asic<br>
              *<br>
              * @adev: amdgpu device pointer<br>
            @@ -2358,7 +2514,7 @@ int amdgpu_gpu_reset(struct
            amdgpu_device *adev)<br>
                     bool need_full_reset;<br>
             <br>
                     if (amdgpu_sriov_vf(adev))<br>
            -               return 0;<br>
            +               return amdgpu_sriov_gpu_reset(adev, true);<br>
             <br>
                     if (!amdgpu_check_soft_reset(adev)) {<br>
                             DRM_INFO("No hardware hang detected. Did
            some blocks stall?\n");<br>
            diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
            b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h<br>
            index 675e12c..73d24df 100644<br>
            --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h<br>
            +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h<br>
            @@ -89,5 +89,6 @@ void amdgpu_virt_kiq_wreg(struct
            amdgpu_device *adev, uint32_t reg, uint32_t v);<br>
             int amdgpu_virt_request_full_gpu(struct amdgpu_device
            *adev, bool init);<br>
             int amdgpu_virt_release_full_gpu(struct amdgpu_device
            *adev, bool init);<br>
             int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);<br>
            +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool
            voluntary);<br>
             <br>
             #endif<br>
            -- <br>
            2.7.4<br>
            <br>
            _______________________________________________<br>
            amd-gfx mailing list<br>
            <a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a><br>
            <a moz-do-not-send="true"
              href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a><br>
          </div>
        </span></font>
      <br>
      <fieldset class="mimeAttachmentHeader"></fieldset>
      <br>
      <pre wrap="">_______________________________________________
amd-gfx mailing list
<a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>
<a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a>
</pre>
    </blockquote>
    <p><br>
    </p>
  </body>
</html>