<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html;
      charset=windows-1252">
  </head>
  <body>
    See below.<br>
    <br>
    <div class="moz-cite-prefix">Am 13.07.21 um 10:32 schrieb Wang,
      Kevin(Yang):<br>
    </div>
    <blockquote type="cite"
cite="mid:CO6PR12MB54734AF103090E1C567E4EF7A2149@CO6PR12MB5473.namprd12.prod.outlook.com">
      <meta http-equiv="Content-Type" content="text/html;
        charset=windows-1252">
      <style type="text/css" style="display:none;">P {margin-top:0;margin-bottom:0;}</style>
      <p
        style="font-family:Arial;font-size:10pt;color:#0000FF;margin:15pt;"
        align="Left">
        [AMD Official Use Only]<br>
      </p>
      <br>
      <div>
        <div style="font-family: Calibri, Arial, Helvetica, sans-serif;
          font-size: 12pt; color: rgb(0, 0, 0);">
          <comments inline></div>
        <div style="font-family: Calibri, Arial, Helvetica, sans-serif;
          font-size: 12pt; color: rgb(0, 0, 0);">
          <br>
        </div>
        <hr tabindex="-1" style="display:inline-block; width:98%">
        <div id="divRplyFwdMsg" dir="ltr"><font style="font-size:11pt"
            face="Calibri, sans-serif" color="#000000"><b>From:</b>
            Christian König <a class="moz-txt-link-rfc2396E" href="mailto:ckoenig.leichtzumerken@gmail.com"><ckoenig.leichtzumerken@gmail.com></a><br>
            <b>Sent:</b> Tuesday, July 13, 2021 3:11 PM<br>
            <b>To:</b> Wang, Kevin(Yang) <a class="moz-txt-link-rfc2396E" href="mailto:Kevin1.Wang@amd.com"><Kevin1.Wang@amd.com></a>;
            <a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>
            <a class="moz-txt-link-rfc2396E" href="mailto:amd-gfx@lists.freedesktop.org"><amd-gfx@lists.freedesktop.org></a><br>
            <b>Cc:</b> Lazar, Lijo <a class="moz-txt-link-rfc2396E" href="mailto:Lijo.Lazar@amd.com"><Lijo.Lazar@amd.com></a>; Deucher,
            Alexander <a class="moz-txt-link-rfc2396E" href="mailto:Alexander.Deucher@amd.com"><Alexander.Deucher@amd.com></a>; Min, Frank
            <a class="moz-txt-link-rfc2396E" href="mailto:Frank.Min@amd.com"><Frank.Min@amd.com></a>; Koenig, Christian
            <a class="moz-txt-link-rfc2396E" href="mailto:Christian.Koenig@amd.com"><Christian.Koenig@amd.com></a>; Zhang, Hawking
            <a class="moz-txt-link-rfc2396E" href="mailto:Hawking.Zhang@amd.com"><Hawking.Zhang@amd.com></a><br>
            <b>Subject:</b> Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize
            vram access in amdgpu_ttm_access_memory()</font>
          <div> </div>
        </div>
        <div class="BodyFragment"><font size="2"><span
              style="font-size:11pt">
              <div class="PlainText">Am 13.07.21 um 05:23 schrieb Kevin
                Wang:<br>
                > 1. using vram aper to access vram if possible<br>
                > 2. avoid MM_INDEX/MM_DATA is not working when mmio
                protect feature is<br>
                > enabled.<br>
                ><br>
                > Signed-off-by: Kevin Wang
                <a class="moz-txt-link-rfc2396E" href="mailto:kevin1.wang@amd.com"><kevin1.wang@amd.com></a><br>
                > ---<br>
                >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126
                +++++++++++++++++-------<br>
                >   1 file changed, 89 insertions(+), 37 deletions(-)<br>
                ><br>
                > diff --git
                a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
                b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
                > index 2aa2eb5de37a..8f6f605bc459 100644<br>
                > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
                > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
                > @@ -1407,6 +1407,91 @@ static bool
                amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object
                *bo,<br>
                >        return ttm_bo_eviction_valuable(bo, place);<br>
                >   }<br>
                >   <br>
                > +static void amdgpu_ttm_vram_mm_align_access(struct
                amdgpu_device *adev, loff_t pos,<br>
                > +                                         uint32_t
                *value, bool write)<br>
                <br>
                Please drop the _vram_ and _align_ part from the name.
                Just <br>
                amdgpu_device_mm_access.</div>
              <div class="PlainText"><br>
              </div>
              <div class="PlainText">[kevin]: I will correct it in next
                patch.</div>
              <div class="PlainText"><br>
                > +{<br>
                > +     unsigned long flags;<br>
                > +<br>
                > +     BUG_ON(!IS_ALIGNED(pos, 4));<br>
                > +<br>
                > +    
                spin_lock_irqsave(&adev->mmio_idx_lock, flags);<br>
                > +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) |
                0x80000000);<br>
                > +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >>
                31);<br>
                > +     if (write)<br>
                > +             WREG32_NO_KIQ(mmMM_DATA, *value);<br>
                > +     else<br>
                > +             *value = RREG32_NO_KIQ(mmMM_DATA);<br>
                > +    
                spin_unlock_irqrestore(&adev->mmio_idx_lock,
                flags);<br>
                > +}<br>
                <br>
                That should still be in amdgpu_device.c and you
                completely messed up the <br>
                drm_dev_enter()/drm_dev_exit() annotation.<br>
                <br>
                Looks like you are working on an old branch where that
                is not yet merged?</div>
              <div class="PlainText"><br>
              </div>
              <div class="PlainText">[kevin]: yes, I'm working on
                amd-staging-drm-next branch, the following patch (from
                drm-next-misc) is not merged into this branch.</div>
            </span></font></div>
      </div>
    </blockquote>
    <br>
    Ok then just wait a bit. Alex wanted to update the
    amd-staging-drm-next branch in the next few days.<br>
    <br>
    There is an internal mail thread about that, maybe ping him on this.<br>
    <br>
    Christian.<br>
    <br>
    <blockquote type="cite"
cite="mid:CO6PR12MB54734AF103090E1C567E4EF7A2149@CO6PR12MB5473.namprd12.prod.outlook.com">
      <div>
        <div class="BodyFragment"><font size="2"><span
              style="font-size:11pt">
              <div class="PlainText"><br>
              </div>
              drm/amdgpu: Guard against write accesses after device
              removal
              <div><br>
              </div>
              <div>This should prevent writing to memory or IO ranges
                possibly</div>
              <div>already allocated for other uses after our device is
                removed.</div>
              <div><br>
              </div>
              <div>v5:</div>
              <div>Protect more places wher memcopy_to/form_io takes
                place</div>
              <div>Protect IB submissions</div>
              <div><br>
              </div>
              <div>v6: Switch to !drm_dev_enter instead of scoping
                entire code</div>
              <div>with brackets.</div>
              <div><br>
              </div>
              <div>v7:</div>
              <div>Drop guard of HW ring commands emission protection
                since they</div>
              <div>are in GART and not in MMIO.</div>
              <div><br>
              </div>
              <div>Signed-off-by: Andrey Grodzovsky
                <a class="moz-txt-link-rfc2396E" href="mailto:andrey.grodzovsky@amd.com"><andrey.grodzovsky@amd.com></a></div>
              Reviewed-by: Alex Deucher
              <a class="moz-txt-link-rfc2396E" href="mailto:alexander.deucher@amd.com"><alexander.deucher@amd.com></a>
              <div class="PlainText"><span
                  style="box-sizing:border-box;font-family:"Segoe
                  UI", system-ui, "Apple Color Emoji",
                  "Segoe UI Emoji", sans-serif;font-size:14px"></span><br>
                <br>
                > +<br>
                > +static void amdgpu_ttm_vram_mm_access(struct
                amdgpu_device *adev, loff_t pos,<br>
                > +                                   void *buf,
                size_t size, bool write)<br>
                > +{<br>
                > +     while (size) {<br>
                > +             uint64_t aligned_pos =
                ALIGN_DOWN(pos, 4);<br>
                > +             uint64_t bytes = 4 - (pos & 0x3);<br>
                > +             uint32_t shift = (pos & 0x3) * 8;<br>
                > +             uint32_t mask = 0xffffffff <<
                shift;<br>
                > +             uint32_t value = 0;<br>
                > +<br>
                > +             if (size < bytes) {<br>
                > +                     mask &= 0xffffffff
                >> (bytes - size) * 8;<br>
                > +                     bytes = size;<br>
                > +             }<br>
                > +<br>
                > +             if (mask != 0xffffffff) {<br>
                > +                    
                amdgpu_ttm_vram_mm_align_access(adev, aligned_pos,
                &value, false);<br>
                > +                     if (write) {<br>
                > +                             value &= ~mask;<br>
                > +                             value |= (*(uint32_t
                *)buf << shift) & mask;<br>
                > +                            
                amdgpu_ttm_vram_mm_align_access(adev, aligned_pos,
                &value, true);<br>
                > +                     } else {<br>
                > +                             value = (value &
                mask) >> shift;<br>
                > +                             memcpy(buf,
                &value, bytes);<br>
                > +                     }<br>
                > +             } else {<br>
                > +                    
                amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf,
                write);<br>
                > +             }<br>
                > +<br>
                > +             pos += bytes;<br>
                > +             buf += bytes;<br>
                > +             size -= bytes;<br>
                > +     }<br>
                > +}<br>
                > +<br>
                > +static void amdgpu_ttm_vram_access(struct
                amdgpu_device *adev, loff_t pos,<br>
                > +                                void *buf, size_t
                size, bool write)<br>
                > +{<br>
                > +     uint64_t last;<br>
                > +<br>
                > +#ifdef CONFIG_64BIT<br>
                > +     last = min(pos + size,
                adev->gmc.visible_vram_size);<br>
                > +     if (last > pos) {<br>
                > +             void __iomem *addr =
                adev->mman.aper_base_kaddr + pos;<br>
                > +             size_t count = last - pos;<br>
                > +<br>
                > +             if (write) {<br>
                > +                     memcpy_toio(addr, buf,
                count);<br>
                > +                     mb();<br>
                > +                     amdgpu_device_flush_hdp(adev,
                NULL);<br>
                > +             } else {<br>
                > +                    
                amdgpu_device_invalidate_hdp(adev, NULL);<br>
                > +                     mb();<br>
                > +                     memcpy_fromio(buf, addr,
                count);<br>
                > +             }<br>
                > +<br>
                > +             if (count == size)<br>
                > +                     return;<br>
                > +<br>
                > +             pos += count;<br>
                > +             buf += count;<br>
                > +             size -= count;<br>
                > +     }<br>
                > +#endif<br>
                <br>
                I would put this as a separate function into
                amdgpu_device.c.<br>
                <br>
                But all of this should only be the second step since we
                need a much <br>
                smaller patch for backporting first.<br>
                <br>
                > +<br>
                > +     amdgpu_ttm_vram_mm_access(adev, pos, buf,
                size, write);<br>
                > +}<br>
                > +<br>
                >   /**<br>
                >    * amdgpu_ttm_access_memory - Read or Write
                memory that backs a buffer object.<br>
                >    *<br>
                > @@ -1426,8 +1511,6 @@ static int
                amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,<br>
                >        struct amdgpu_bo *abo =
                ttm_to_amdgpu_bo(bo);<br>
                >        struct amdgpu_device *adev =
                amdgpu_ttm_adev(abo->tbo.bdev);<br>
                >        struct amdgpu_res_cursor cursor;<br>
                > -     unsigned long flags;<br>
                > -     uint32_t value = 0;<br>
                >        int ret = 0;<br>
                >   <br>
                >        if (bo->mem.mem_type != TTM_PL_VRAM)<br>
                > @@ -1435,41 +1518,10 @@ static int
                amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,<br>
                >   <br>
                >        amdgpu_res_first(&bo->mem, offset,
                len, &cursor);<br>
                >        while (cursor.remaining) {<br>
                > -             uint64_t aligned_pos = cursor.start
                & ~(uint64_t)3;<br>
                > -             uint64_t bytes = 4 - (cursor.start
                & 3);<br>
                > -             uint32_t shift = (cursor.start &
                3) * 8;<br>
                > -             uint32_t mask = 0xffffffff <<
                shift;<br>
                > -<br>
                > -             if (cursor.size < bytes) {<br>
                > -                     mask &= 0xffffffff
                >> (bytes - cursor.size) * 8;<br>
                > -                     bytes = cursor.size;<br>
                > -             }<br>
                > -<br>
                > -             if (mask != 0xffffffff) {<br>
                > -                    
                spin_lock_irqsave(&adev->mmio_idx_lock, flags);<br>
                > -                     WREG32_NO_KIQ(mmMM_INDEX,
                ((uint32_t)aligned_pos) | 0x80000000);<br>
                > -                     WREG32_NO_KIQ(mmMM_INDEX_HI,
                aligned_pos >> 31);<br>
                > -                     value =
                RREG32_NO_KIQ(mmMM_DATA);<br>
                > -                     if (write) {<br>
                > -                             value &= ~mask;<br>
                > -                             value |= (*(uint32_t
                *)buf << shift) & mask;<br>
                > -                            
                WREG32_NO_KIQ(mmMM_DATA, value);<br>
                > -                     }<br>
                > -                    
                spin_unlock_irqrestore(&adev->mmio_idx_lock,
                flags);<br>
                > -                     if (!write) {<br>
                > -                             value = (value &
                mask) >> shift;<br>
                > -                             memcpy(buf,
                &value, bytes);<br>
                > -                     }<br>
                <br>
                This here is the problematic part and should use <br>
                amdgpu_ttm_vram_access() instead.<br>
                <br>
                That should be implemented first since we might need to
                backport that.<br>
                <br>
                Regards,<br>
                Christian.<br>
                <br>
                > -             } else {<br>
                > -                     bytes = cursor.size &
                ~0x3ULL;<br>
                > -                    
                amdgpu_device_vram_access(adev, cursor.start,<br>
                > -                                              
                (uint32_t *)buf, bytes,<br>
                > -                                              
                write);<br>
                > -             }<br>
                > -<br>
                > -             ret += bytes;<br>
                > -             buf = (uint8_t *)buf + bytes;<br>
                > -             amdgpu_res_next(&cursor, bytes);<br>
                > +             amdgpu_ttm_vram_access(adev,
                cursor.start, buf, cursor.size, write);<br>
                > +             ret += cursor.size;<br>
                > +             buf += cursor.size;<br>
                > +             amdgpu_res_next(&cursor,
                cursor.size);<br>
                >        }<br>
                >   <br>
                >        return ret;<br>
                <br>
              </div>
            </span></font></div>
      </div>
    </blockquote>
    <br>
  </body>
</html>