<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2024-10-09 17:20, Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:d2aac83f-4a2c-4f90-9c0c-eb471e1c5933@amd.com">
      <br>
      On 2024-10-04 16:28, Philip Yang wrote:
      <br>
      <blockquote type="cite">Per process device data pdd->vram_usage
        is used by rocm-smi to report
        <br>
        VRAM usage, this is currently missing the svm_bo usage
        accounting, so
        <br>
        "rocm-smi --showpids" per process report is incorrect.
        <br>
        <br>
        Add pdd->vram_usage accounting for svm_bo and change type to
        atomic64_t
        <br>
        because it is updated outside process mutex now.
        <br>
        <br>
        Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 22
        ++++++++++++++++++++++
        <br>
          4 files changed, 28 insertions(+), 6 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
        <br>
        index a1f191a5984b..065d87841459 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
        <br>
        @@ -1148,7 +1148,7 @@ static int
        kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
        <br>
                    if (flags &
        KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
        <br>
                      size >>= 1;
        <br>
        -        WRITE_ONCE(pdd->vram_usage, pdd->vram_usage +
        PAGE_ALIGN(size));
        <br>
        +        atomic64_add(PAGE_ALIGN(size),
        &pdd->vram_usage);
        <br>
              }
        <br>
                mutex_unlock(&p->mutex);
        <br>
        @@ -1219,7 +1219,7 @@ static int
        kfd_ioctl_free_memory_of_gpu(struct file *filep,
        <br>
                  kfd_process_device_remove_obj_handle(
        <br>
                      pdd, GET_IDR_HANDLE(args->handle));
        <br>
          -    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage -
        size);
        <br>
        +    atomic64_sub(size, &pdd->vram_usage);
        <br>
            err_unlock:
        <br>
          err_pdd:
        <br>
        @@ -2347,7 +2347,7 @@ static int
        criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
        <br>
              } else if (bo_bucket->alloc_flags &
        KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
        <br>
                  bo_bucket->restored_offset = offset;
        <br>
                  /* Update the VRAM usage count */
        <br>
        -        WRITE_ONCE(pdd->vram_usage, pdd->vram_usage +
        bo_bucket->size);
        <br>
        +        atomic64_add(bo_bucket->size,
        &pdd->vram_usage);
        <br>
              }
        <br>
              return 0;
        <br>
          }
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        index 6a5bf88cc232..9e5ca0b93b2a 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        @@ -775,7 +775,7 @@ struct kfd_process_device {
        <br>
              enum kfd_pdd_bound bound;
        <br>
                /* VRAM usage */
        <br>
        -    uint64_t vram_usage;
        <br>
        +    atomic64_t vram_usage;
        <br>
              struct attribute attr_vram;
        <br>
              char vram_filename[MAX_SYSFS_FILENAME_LEN];
        <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        index 7909dfd158be..4810521736a9 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct
        kobject *kobj, struct attribute *attr,
        <br>
              } else if (strncmp(attr->name, "vram_", 5) == 0) {
        <br>
                  struct kfd_process_device *pdd = container_of(attr,
        struct kfd_process_device,
        <br>
                                            attr_vram);
        <br>
        -        return snprintf(buffer, PAGE_SIZE, "%llu\n",
        READ_ONCE(pdd->vram_usage));
        <br>
        +        return snprintf(buffer, PAGE_SIZE, "%llu\n",
        atomic64_read(&pdd->vram_usage));
        <br>
              } else if (strncmp(attr->name, "sdma_", 5) == 0) {
        <br>
                  struct kfd_process_device *pdd = container_of(attr,
        struct kfd_process_device,
        <br>
                                            attr_sdma);
        <br>
        @@ -1625,7 +1625,7 @@ struct kfd_process_device
        *kfd_create_process_device_data(struct kfd_node *dev,
        <br>
              pdd->bound = PDD_UNBOUND;
        <br>
              pdd->already_dequeued = false;
        <br>
              pdd->runtime_inuse = false;
        <br>
        -    pdd->vram_usage = 0;
        <br>
        +    atomic64_set(&pdd->vram_usage, 0);
        <br>
              pdd->sdma_past_activity_counter = 0;
        <br>
              pdd->user_gpu_id = dev->id;
        <br>
              atomic64_set(&pdd->evict_duration_counter, 0);
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        index 857ec6f23bba..61891ea6b1ac 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        @@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct
        svm_range_bo *svm_bo)
        <br>
          static void svm_range_bo_release(struct kref *kref)
        <br>
          {
        <br>
              struct svm_range_bo *svm_bo;
        <br>
        +    struct mm_struct *mm = NULL;
        <br>
                svm_bo = container_of(kref, struct svm_range_bo, kref);
        <br>
              pr_debug("svm_bo 0x%p\n", svm_bo);
        <br>
        @@ -405,6 +406,22 @@ static void svm_range_bo_release(struct
        kref *kref)
        <br>
                  spin_lock(&svm_bo->list_lock);
        <br>
              }
        <br>
              spin_unlock(&svm_bo->list_lock);
        <br>
        +
        <br>
        +    if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
        <br>
        +        struct kfd_process_device *pdd;
        <br>
        +        struct kfd_process *p;
        <br>
      </blockquote>
      <br>
      Move struct mm_struct *mm here as well. It's only needed in this
      block and should not be used outside.
      <br>
    </blockquote>
    yes, mm is only used here. If changing svm_bo->node to
    svm_bo->pdd, the entire block will be dropped.  <br>
    <blockquote type="cite" cite="mid:d2aac83f-4a2c-4f90-9c0c-eb471e1c5933@amd.com">
      <br>
      <br>
      <blockquote type="cite">+
        <br>
        +        mm = svm_bo->eviction_fence->mm;
        <br>
        +        p = kfd_lookup_process_by_mm(mm);
        <br>
        +        if (p) {
        <br>
        +            pdd = kfd_get_process_device_data(svm_bo->node,
        p);
        <br>
        +            if (pdd)
        <br>
        +                atomic64_sub(amdgpu_bo_size(svm_bo->bo),
        &pdd->vram_usage);
        <br>
        +            kfd_unref_process(p);
        <br>
        +        }
        <br>
        +        mmput(mm);
        <br>
        +    }
        <br>
        +
        <br>
              if
        (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
        <br>
                  /* We're not in the eviction worker. Signal the fence.
        */
        <br>
                 
        dma_fence_signal(&svm_bo->eviction_fence->base);
        <br>
        @@ -532,6 +549,7 @@ int
        <br>
          svm_range_vram_node_new(struct kfd_node *node, struct
        svm_range *prange,
        <br>
                      bool clear)
        <br>
          {
        <br>
        +    struct kfd_process_device *pdd;
        <br>
              struct amdgpu_bo_param bp;
        <br>
              struct svm_range_bo *svm_bo;
        <br>
              struct amdgpu_bo_user *ubo;
        <br>
        @@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node
        *node, struct svm_range *prange,
        <br>
              list_add(&prange->svm_bo_list,
        &svm_bo->range_list);
        <br>
              spin_unlock(&svm_bo->list_lock);
        <br>
          +    pdd = svm_range_get_pdd_by_node(prange, node);
        <br>
        +    if (pdd)
        <br>
        +        atomic64_add(amdgpu_bo_size(bo),
        &pdd->vram_usage);
        <br>
        +
        <br>
      </blockquote>
      <br>
      Would it make sense to save the pdd pointer in the svm_bo struct?
      The effort to look up the mm, process and pdd in
      svm_range_bo_release seems quite high.
      <br>
    </blockquote>
    Thanks for the good idea.<br>
    <blockquote type="cite" cite="mid:d2aac83f-4a2c-4f90-9c0c-eb471e1c5933@amd.com">
      <br>
      You could replace svm_bo->node with svm_bo->pdd. Then you
      can still get the node with svm_bo->pdd->dev without growing
      the size of the structure. This assumes that the svm_bo cannot
      outlive the pdd.
      <br>
    </blockquote>
    <p>yes, svm_range_list_fini is called before calling
      kfd_process_destroy_pdds after process exit, so svm_bo->pdd
      will always be valid. I will send new patch series.<br>
    </p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:d2aac83f-4a2c-4f90-9c0c-eb471e1c5933@amd.com">
      <br>
      Regards,
      <br>
        Felix
      <br>
      <br>
      <br>
      <blockquote type="cite">      return 0;
        <br>
            reserve_bo_failed:
        <br>
      </blockquote>
    </blockquote>
  </body>
</html>