<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    It really *is* a NOALLOC feature. In other words there is no latency
    improvement on reads because the cache is always checked, even with
    the noalloc flag set.<br>
    <br>
    The only thing it affects is that misses not enter the cache and so
    don't cause any additional pressure on evicting cache lines.<br>
    <br>
    You might want to double check with the hardware guys, but I'm
    something like 95% sure that it works this way.<br>
    <br>
    Christian.<br>
    <br>
    <div class="moz-cite-prefix">Am 11.05.22 um 09:22 schrieb Marek
      Olšák:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAAxE2A7-crNh7oTTzgm7W7J412_ijrmoiN9ip9iiS1MJmmaB4A@mail.gmail.com">
      <meta http-equiv="content-type" content="text/html; charset=UTF-8">
      <div dir="ltr">
        <div dir="ltr">Bypass means that the contents of the cache are
          ignored, which decreases latency at the cost of no coherency
          between bypassed and normal memory requests. NOA (noalloc)
          means that the cache is checked and can give you cache hits,
          but misses are not cached and the overall latency is higher. I
          don't know what the hw does, but I hope it was misnamed and it
          really means bypass because there is no point in doing cache
          lookups on every memory request if the driver wants to disable
          caching to *decrease* latency in the situations when the cache
          isn't helping.<br>
        </div>
        <div dir="ltr"><br>
        </div>
        <div>Marek<br>
        </div>
        <br>
        <div class="gmail_quote">
          <div dir="ltr" class="gmail_attr">On Wed, May 11, 2022 at 2:15
            AM Lazar, Lijo <<a href="mailto:lijo.lazar@amd.com"
              moz-do-not-send="true" class="moz-txt-link-freetext">lijo.lazar@amd.com</a>>
            wrote:<br>
          </div>
          <blockquote class="gmail_quote" style="margin:0px 0px 0px
            0.8ex;border-left:1px solid
            rgb(204,204,204);padding-left:1ex"><br>
            <br>
            On 5/11/2022 11:36 AM, Christian König wrote:<br>
            > Mhm, it doesn't really bypass MALL. It just doesn't
            allocate any MALL <br>
            > entries on write.<br>
            > <br>
            > How about AMDGPU_VM_PAGE_NO_MALL ?<br>
            <br>
            One more - AMDGPU_VM_PAGE_LLC_* [ LLC = last level cache, *
            = some sort <br>
            of attribute which decides LLC behaviour]<br>
            <br>
            Thanks,<br>
            Lijo<br>
            <br>
            > <br>
            > Christian.<br>
            > <br>
            > Am 10.05.22 um 23:21 schrieb Marek Olšák:<br>
            >> A better name would be:<br>
            >> AMDGPU_VM_PAGE_BYPASS_MALL<br>
            >><br>
            >> Marek<br>
            >><br>
            >> On Fri, May 6, 2022 at 7:23 AM Christian König <br>
            >> <<a
              href="mailto:ckoenig.leichtzumerken@gmail.com"
              target="_blank" moz-do-not-send="true"
              class="moz-txt-link-freetext">ckoenig.leichtzumerken@gmail.com</a>>
            wrote:<br>
            >><br>
            >>     Add the AMDGPU_VM_NOALLOC flag to let userspace
            control MALL<br>
            >>     allocation.<br>
            >><br>
            >>     Only compile tested!<br>
            >><br>
            >>     Signed-off-by: Christian König <<a
              href="mailto:christian.koenig@amd.com" target="_blank"
              moz-do-not-send="true" class="moz-txt-link-freetext">christian.koenig@amd.com</a>><br>
            >>     ---<br>
            >>      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 ++<br>
            >>      drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 3
            +++<br>
            >>      drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 3
            +++<br>
            >>      include/uapi/drm/amdgpu_drm.h           | 2 ++<br>
            >>      4 files changed, 10 insertions(+)<br>
            >><br>
            >>     diff --git
            a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c<br>
            >>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c<br>
            >>     index bf97d8f07f57..d8129626581f 100644<br>
            >>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c<br>
            >>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c<br>
            >>     @@ -650,6 +650,8 @@ uint64_t
            amdgpu_gem_va_map_flags(struct<br>
            >>     amdgpu_device *adev, uint32_t flags)<br>
            >>                     pte_flag |=
            AMDGPU_PTE_WRITEABLE;<br>
            >>             if (flags & AMDGPU_VM_PAGE_PRT)<br>
            >>                     pte_flag |= AMDGPU_PTE_PRT;<br>
            >>     +       if (flags & AMDGPU_VM_PAGE_NOALLOC)<br>
            >>     +               pte_flag |= AMDGPU_PTE_NOALLOC;<br>
            >><br>
            >>             if
            (adev->gmc.gmc_funcs->map_mtype)<br>
            >>                     pte_flag |=
            amdgpu_gmc_map_mtype(adev,<br>
            >>     diff --git
            a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c<br>
            >>     b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c<br>
            >>     index b8c79789e1e4..9077dfccaf3c 100644<br>
            >>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c<br>
            >>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c<br>
            >>     @@ -613,6 +613,9 @@ static void
            gmc_v10_0_get_vm_pte(struct<br>
            >>     amdgpu_device *adev,<br>
            >>             *flags &=
            ~AMDGPU_PTE_MTYPE_NV10_MASK;<br>
            >>             *flags |= (mapping->flags &
            AMDGPU_PTE_MTYPE_NV10_MASK);<br>
            >><br>
            >>     +       *flags &= ~AMDGPU_PTE_NOALLOC;<br>
            >>     +       *flags |= (mapping->flags &
            AMDGPU_PTE_NOALLOC);<br>
            >>     +<br>
            >>             if (mapping->flags &
            AMDGPU_PTE_PRT) {<br>
            >>                     *flags |= AMDGPU_PTE_PRT;<br>
            >>                     *flags |= AMDGPU_PTE_SNOOPED;<br>
            >>     diff --git
            a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c<br>
            >>     b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c<br>
            >>     index 8d733eeac556..32ee56adb602 100644<br>
            >>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c<br>
            >>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c<br>
            >>     @@ -508,6 +508,9 @@ static void
            gmc_v11_0_get_vm_pte(struct<br>
            >>     amdgpu_device *adev,<br>
            >>             *flags &=
            ~AMDGPU_PTE_MTYPE_NV10_MASK;<br>
            >>             *flags |= (mapping->flags &
            AMDGPU_PTE_MTYPE_NV10_MASK);<br>
            >><br>
            >>     +       *flags &= ~AMDGPU_PTE_NOALLOC;<br>
            >>     +       *flags |= (mapping->flags &
            AMDGPU_PTE_NOALLOC);<br>
            >>     +<br>
            >>             if (mapping->flags &
            AMDGPU_PTE_PRT) {<br>
            >>                     *flags |= AMDGPU_PTE_PRT;<br>
            >>                     *flags |= AMDGPU_PTE_SNOOPED;<br>
            >>     diff --git a/include/uapi/drm/amdgpu_drm.h<br>
            >>     b/include/uapi/drm/amdgpu_drm.h<br>
            >>     index 57b9d8f0133a..9d71d6330687 100644<br>
            >>     --- a/include/uapi/drm/amdgpu_drm.h<br>
            >>     +++ b/include/uapi/drm/amdgpu_drm.h<br>
            >>     @@ -533,6 +533,8 @@ struct drm_amdgpu_gem_op {<br>
            >>      #define AMDGPU_VM_MTYPE_UC             (4
            << 5)<br>
            >>      /* Use Read Write MTYPE instead of default
            MTYPE */<br>
            >>      #define AMDGPU_VM_MTYPE_RW             (5
            << 5)<br>
            >>     +/* don't allocate MALL */<br>
            >>     +#define AMDGPU_VM_PAGE_NOALLOC         (1
            << 9)<br>
            >><br>
            >>      struct drm_amdgpu_gem_va {<br>
            >>             /** GEM object handle */<br>
            >>     -- <br>
            >>     2.25.1<br>
            >><br>
            > <br>
          </blockquote>
        </div>
      </div>
    </blockquote>
    <br>
  </body>
</html>