<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2024-08-29 18:31, Chen, Xiaogang
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:78d992ce-9a81-480c-9acd-5a176d40ada3@amd.com">
      <br>
      <br>
      On 8/29/2024 5:13 PM, Ramesh Errabolu wrote:
      <br>
      <blockquote type="cite">Caution: This message originated from an
        External Source. Use proper caution when opening attachments,
        clicking links, or responding.
        <br>
        <br>
        <br>
        Enables users to update SVM's default granularity, used in
        <br>
        buffer migration and handling of recoverable page faults.
        <br>
        Param value is set in terms of log(numPages(buffer)),
        <br>
        e.g. 9 for a 2 MIB buffer
        <br>
      </blockquote>
      <br>
      Forgot asking if this parameter is request from customer or used
      for debug/experiment purpose? If it is later, how about put it at
      debug fs? There are already many driver parameters.
      <br>
    </blockquote>
    <p>debugfs is not always available, depending on kernel
      configuration, and debugfs seems for debugging purpose, ex.
      /sys/kernel/debug/kfd/mqds, hqds, not for functional purpose. one
      comment embedded below.<br>
    </p>
    <blockquote type="cite" cite="mid:78d992ce-9a81-480c-9acd-5a176d40ada3@amd.com">
      <br>
      Regards
      <br>
      <br>
      Xiaogang
      <br>
      <br>
      <blockquote type="cite">Signed-off-by: Ramesh Errabolu
        <a class="moz-txt-link-rfc2396E" href="mailto:Ramesh.Errabolu@amd.com"><Ramesh.Errabolu@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 17 +++++++++++++++++
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  6 ++++++
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 25
        +++++++++++++++----------
        <br>
          4 files changed, 39 insertions(+), 10 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
        b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
        <br>
        index e8c284aea1f2..8eb934af02f2 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
        <br>
        @@ -237,6 +237,7 @@ extern int sched_policy;
        <br>
          extern bool debug_evictions;
        <br>
          extern bool no_system_mem_limit;
        <br>
          extern int halt_if_hws_hang;
        <br>
        +extern uint amdgpu_svm_default_granularity;
        <br>
          #else
        <br>
          static const int __maybe_unused sched_policy =
        KFD_SCHED_POLICY_HWS;
        <br>
          static const bool __maybe_unused debug_evictions; /* = false
        */
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
        <br>
        index b9529948f2b2..442039436cb3 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
        <br>
        @@ -169,6 +169,16 @@ uint amdgpu_sdma_phase_quantum = 32;
        <br>
          char *amdgpu_disable_cu;
        <br>
          char *amdgpu_virtual_display;
        <br>
          bool enforce_isolation;
        <br>
        +
        <br>
        +/* Specifies the default granularity for SVM, used in buffer
        <br>
        + * migration and restoration of backing memory when handling
        <br>
        + * recoverable page faults.
        <br>
        + *
        <br>
        + * The value is given as log(numPages(buffer)); for a 2 MiB
        <br>
        + * buffer it computes to be 9
        <br>
        + */
        <br>
        +uint amdgpu_svm_default_granularity = 9;
        <br>
        +
        <br>
          /*
        <br>
           * OverDrive(bit 14) disabled by default
        <br>
           * GFX DCS(bit 19) disabled by default
        <br>
        @@ -320,6 +330,13 @@ module_param_named(pcie_gen2,
        amdgpu_pcie_gen2, int, 0444);
        <br>
          MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable,
        -1 = auto)");
        <br>
          module_param_named(msi, amdgpu_msi, int, 0444);
        <br>
        <br>
        +/**
        <br>
        + * DOC: svm_default_granularity (uint)
        <br>
        + * Used in buffer migration and handling of recoverable page
        faults
        <br>
        + */
        <br>
        +MODULE_PARM_DESC(svm_default_granularity, "SVM's default
        granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
        <br>
        +module_param_named(svm_default_granularity,
        amdgpu_svm_default_granularity, uint, 0644);
        <br>
        +
        <br>
          /**
        <br>
           * DOC: lockup_timeout (string)
        <br>
           * Set GPU scheduler timeout value in ms.
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        index 9ae9abc6eb43..d6530febabad 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        @@ -868,6 +868,12 @@ struct svm_range_list {
        <br>
                 struct task_struct              *faulting_task;
        <br>
                 /* check point ts decides if page fault recovery need
        be dropped */
        <br>
                 uint64_t                       
        checkpoint_ts[MAX_GPU_INSTANCE];
        <br>
        +
        <br>
        +       /* Default granularity to use in buffer migration
        <br>
        +        * and restoration of backing memory while handling
        <br>
        +        * recoverable page faults
        <br>
        +        */
        <br>
        +       uint8_t default_granularity;
        <br>
          };
        <br>
        <br>
          /* Process data */
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        index b44dec90969f..624bfe317c9c 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        @@ -309,12 +309,13 @@ static void svm_range_free(struct
        svm_range *prange, bool do_unmap)
        <br>
          }
        <br>
        <br>
          static void
        <br>
        -svm_range_set_default_attributes(int32_t *location, int32_t
        *prefetch_loc,
        <br>
        -                                uint8_t *granularity, uint32_t
        *flags)
        <br>
        +svm_range_set_default_attributes(struct svm_range_list *svms,
        <br>
        +                       int32_t *location, uint8_t *granularity,
        <br>
        +                       int32_t *prefetch_loc, uint32_t *flags)
        <br>
          {
        <br>
                 *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
        <br>
                 *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
        <br>
        -       *granularity = 9;
        <br>
        +       *granularity = svms->default_granularity;
        <br>
                 *flags =
        <br>
                         KFD_IOCTL_SVM_FLAG_HOST_ACCESS |
        KFD_IOCTL_SVM_FLAG_COHERENT;
        <br>
          }
        <br>
        @@ -358,9 +359,8 @@ svm_range *svm_range_new(struct
        svm_range_list *svms, uint64_t start,
        <br>
                         bitmap_copy(prange->bitmap_access,
        svms->bitmap_supported,
        <br>
                                     MAX_GPU_INSTANCE);
        <br>
        <br>
        -      
        svm_range_set_default_attributes(&prange->preferred_loc,
        <br>
        -                                       
        &prange->prefetch_loc,
        <br>
        -                                       
        &prange->granularity, &prange->flags);
        <br>
        +       svm_range_set_default_attributes(svms,
        &prange->preferred_loc,
        <br>
        +               &prange->granularity,
        &prange->prefetch_loc, &prange->flags);
        <br>
        <br>
                 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start,
        last);
        <br>
        <br>
        @@ -2694,9 +2694,10 @@ svm_range_get_range_boundaries(struct
        kfd_process *p, int64_t addr,
        <br>
                 *is_heap_stack = vma_is_initial_heap(vma) ||
        vma_is_initial_stack(vma);
        <br>
        <br>
                 start_limit = max(vma->vm_start >> PAGE_SHIFT,
        <br>
        -                     (unsigned long)ALIGN_DOWN(addr, 2UL
        << 8));
        <br>
        +                     (unsigned long)ALIGN_DOWN(addr, 1UL
        << p->svms.default_granularity));
        <br>
                 end_limit = min(vma->vm_end >> PAGE_SHIFT,
        <br>
        -                   (unsigned long)ALIGN(addr + 1, 2UL <<
        8));
        <br>
        +                   (unsigned long)ALIGN(addr + 1, 1UL <<
        p->svms.default_granularity));
        <br>
        +
        <br>
                 /* First range that starts after the fault address */
        <br>
                 node =
        interval_tree_iter_first(&p->svms.objects, addr + 1,
        ULONG_MAX);
        <br>
                 if (node) {
        <br>
        @@ -3240,6 +3241,10 @@ int svm_range_list_init(struct
        kfd_process *p)
        <br>
                         if
        (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
        <br>
                                 bitmap_set(svms->bitmap_supported,
        i, 1);
        <br>
        <br>
        +       /* Update default granularity to one bound by
        user/driver */
        <br>
        +       svms->default_granularity = min_t(u8,
        amdgpu_svm_default_granularity, 0x1B);
        <br>
      </blockquote>
    </blockquote>
    <p>the max granularity is 0x3F, 63 bits, why do you limit max
      granularity to 0x1B?</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <br>
    <blockquote type="cite" cite="mid:78d992ce-9a81-480c-9acd-5a176d40ada3@amd.com">
      <blockquote type="cite">+       pr_debug("Default SVM Granularity
        to use: %d\n", svms->default_granularity);
        <br>
        +
        <br>
                 return 0;
        <br>
          }
        <br>
        <br>
        @@ -3767,8 +3772,8 @@ svm_range_get_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
                 node = interval_tree_iter_first(&svms->objects,
        start, last);
        <br>
                 if (!node) {
        <br>
                         pr_debug("range attrs not found return default
        values\n");
        <br>
        -               svm_range_set_default_attributes(&location,
        &prefetch_loc,
        <br>
        -                                               
        &granularity, &flags_and);
        <br>
        +               svm_range_set_default_attributes(svms,
        &location,
        <br>
        +                               &granularity,
        &prefetch_loc, &flags_and);
        <br>
                         flags_or = flags_and;
        <br>
                         if (p->xnack_enabled)
        <br>
                                 bitmap_copy(bitmap_access,
        svms->bitmap_supported,
        <br>
        --
        <br>
        2.34.1
        <br>
        <br>
      </blockquote>
    </blockquote>
  </body>
</html>