<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2022-04-21 16:17, Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:8c272923-ddfa-9151-4efe-cf8127ea995f@amd.com">Am
      2022-04-19 um 20:47 schrieb Philip Yang:
      <br>
      <blockquote type="cite">Change SVM range mapping flags or access
        attributes don't trigger
        <br>
        migration, if range is already mapped on GPUs we should update
        GPU
        <br>
        mapping, and pass flush_tlb flag to amdgpu vm.
        <br>
        <br>
        Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41
        ++++++++++++++++++----------
        <br>
          1 file changed, 27 insertions(+), 14 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        index 6be1695f3a09..0a9bdadf875e 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
        <br>
            static void
        <br>
          svm_range_apply_attrs(struct kfd_process *p, struct svm_range
        *prange,
        <br>
        -              uint32_t nattr, struct kfd_ioctl_svm_attribute
        *attrs)
        <br>
        +              uint32_t nattr, struct kfd_ioctl_svm_attribute
        *attrs,
        <br>
        +              bool *update_mapping)
        <br>
          {
        <br>
              uint32_t i;
        <br>
              int gpuidx;
        <br>
        @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p,
        struct svm_range *prange,
        <br>
                  case KFD_IOCTL_SVM_ATTR_ACCESS:
        <br>
                  case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
        <br>
                  case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
        <br>
        +            *update_mapping = true;
        <br>
                      gpuidx = kfd_process_gpuidx_from_gpuid(p,
        <br>
                                             attrs[i].value);
        <br>
                      if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS)
        {
        <br>
        @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process
        *p, struct svm_range *prange,
        <br>
                      }
        <br>
                      break;
        <br>
                  case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
        <br>
        +            *update_mapping = true;
        <br>
                      prange->flags |= attrs[i].value;
        <br>
                      break;
        <br>
                  case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
        <br>
        +            *update_mapping = true;
        <br>
                      prange->flags &= ~attrs[i].value;
        <br>
                      break;
        <br>
                  case KFD_IOCTL_SVM_ATTR_GRANULARITY:
        <br>
        @@ -1253,7 +1257,7 @@ static int
        <br>
          svm_range_map_to_gpu(struct kfd_process_device *pdd, struct
        svm_range *prange,
        <br>
                       unsigned long offset, unsigned long npages, bool
        readonly,
        <br>
                       dma_addr_t *dma_addr, struct amdgpu_device
        *bo_adev,
        <br>
        -             struct dma_fence **fence)
        <br>
        +             struct dma_fence **fence, bool flush_tlb)
        <br>
          {
        <br>
              struct amdgpu_device *adev = pdd->dev->adev;
        <br>
              struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
        <br>
        @@ -1291,7 +1295,7 @@ svm_range_map_to_gpu(struct
        kfd_process_device *pdd, struct svm_range *prange,
        <br>
                       (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
        <br>
                       pte_flags);
        <br>
          -        r = amdgpu_vm_update_range(adev, vm, false, false,
        false, NULL,
        <br>
        +        r = amdgpu_vm_update_range(adev, vm, false, false,
        flush_tlb, NULL,
        <br>
                                 last_start, prange->start + i,
        <br>
                                 pte_flags,
        <br>
                                 last_start - prange->start,
        <br>
        @@ -1325,7 +1329,7 @@ svm_range_map_to_gpu(struct
        kfd_process_device *pdd, struct svm_range *prange,
        <br>
          static int
        <br>
          svm_range_map_to_gpus(struct svm_range *prange, unsigned long
        offset,
        <br>
                        unsigned long npages, bool readonly,
        <br>
        -              unsigned long *bitmap, bool wait)
        <br>
        +              unsigned long *bitmap, bool wait, bool flush_tlb)
        <br>
          {
        <br>
              struct kfd_process_device *pdd;
        <br>
              struct amdgpu_device *bo_adev;
        <br>
        @@ -1360,7 +1364,8 @@ svm_range_map_to_gpus(struct svm_range
        *prange, unsigned long offset,
        <br>
                    r = svm_range_map_to_gpu(pdd, prange, offset,
        npages, readonly,
        <br>
                               prange->dma_addr[gpuidx],
        <br>
        -                     bo_adev, wait ? &fence : NULL);
        <br>
        +                     bo_adev, wait ? &fence : NULL,
        <br>
        +                     flush_tlb);
        <br>
                  if (r)
        <br>
                      break;
        <br>
          @@ -1481,8 +1486,8 @@ static void *kfd_svm_page_owner(struct
        kfd_process *p, int32_t gpuidx)
        <br>
           * 5. Release page table (and SVM BO) reservation
        <br>
           */
        <br>
          static int svm_range_validate_and_map(struct mm_struct *mm,
        <br>
        -                      struct svm_range *prange,
        <br>
        -                      int32_t gpuidx, bool intr, bool wait)
        <br>
        +                      struct svm_range *prange, int32_t gpuidx,
        <br>
        +                      bool intr, bool wait, bool flush_tlb)
        <br>
          {
        <br>
              struct svm_validate_context ctx;
        <br>
              unsigned long start, end, addr;
        <br>
        @@ -1521,8 +1526,12 @@ static int
        svm_range_validate_and_map(struct mm_struct *mm,
        <br>
                        prange->bitmap_aip, MAX_GPU_INSTANCE);
        <br>
              }
        <br>
          -    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
        <br>
        -        return 0;
        <br>
        +    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
        <br>
        +        if (!prange->mapped_to_gpu)
        <br>
        +            return 0;
        <br>
        +
        <br>
        +        bitmap_copy(ctx.bitmap, prange->bitmap_access,
        MAX_GPU_INSTANCE);
        <br>
        +    }
        <br>
                if (prange->actual_loc &&
        !prange->ttm_res) {
        <br>
                  /* This should never happen. actual_loc gets set by
        <br>
        @@ -1594,7 +1603,7 @@ static int
        svm_range_validate_and_map(struct mm_struct *mm,
        <br>
                  }
        <br>
                    r = svm_range_map_to_gpus(prange, offset, npages,
        readonly,
        <br>
        -                      ctx.bitmap, wait);
        <br>
        +                      ctx.bitmap, wait, flush_tlb);
        <br>
            unlock_out:
        <br>
                  svm_range_unlock(prange);
        <br>
        @@ -1690,7 +1699,7 @@ static void svm_range_restore_work(struct
        work_struct *work)
        <br>
                  mutex_lock(&prange->migrate_mutex);
        <br>
                    r = svm_range_validate_and_map(mm, prange,
        MAX_GPU_INSTANCE,
        <br>
        -                           false, true);
        <br>
        +                           false, true, false);
        <br>
                  if (r)
        <br>
                      pr_debug("failed %d to map 0x%lx to gpus\n", r,
        <br>
                           prange->start);
        <br>
        @@ -2828,7 +2837,7 @@ svm_range_restore_pages(struct
        amdgpu_device *adev, unsigned int pasid,
        <br>
                  }
        <br>
              }
        <br>
          -    r = svm_range_validate_and_map(mm, prange, gpuidx, false,
        false);
        <br>
        +    r = svm_range_validate_and_map(mm, prange, gpuidx, false,
        false, false);
        <br>
              if (r)
        <br>
                  pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to
        gpus\n",
        <br>
                       r, svms, prange->start, prange->last);
        <br>
        @@ -3241,6 +3250,8 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
              struct svm_range_list *svms;
        <br>
              struct svm_range *prange;
        <br>
              struct svm_range *next;
        <br>
        +    bool update_mapping = false;
        <br>
        +    bool flush_tlb;
        <br>
              int r = 0;
        <br>
                pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages
        0x%llx\n",
        <br>
        @@ -3279,7 +3290,7 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
                  svm_range_add_notifier_locked(mm, prange);
        <br>
              }
        <br>
              list_for_each_entry(prange, &update_list, update_list)
        {
        <br>
        -        svm_range_apply_attrs(p, prange, nattr, attrs);
        <br>
        +        svm_range_apply_attrs(p, prange, nattr, attrs,
        &update_mapping);
        <br>
                  /* TODO: unmap ranges from GPU that lost access */
        <br>
              }
        <br>
              list_for_each_entry_safe(prange, next, &remove_list,
        update_list) {
        <br>
        @@ -3312,8 +3323,10 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
                      continue;
        <br>
                  }
        <br>
          +        flush_tlb = !migrated && update_mapping
        && prange->mapped_to_gpu;
        <br>
        +
        <br>
      </blockquote>
      <br>
      Can we skip the validate_and_map if update_mapping is false?
      <br>
      <br>
    </blockquote>
    <p>Yes, if changing range perferred_loc or migration granularity,
      don't need update mapping, we can skip validate_and_map, will send
      out v2 patch.</p>
    <p>Thanks,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:8c272923-ddfa-9151-4efe-cf8127ea995f@amd.com">Other than
      that, the series looks good to me.
      <br>
      <br>
      Regards,
      <br>
        Felix
      <br>
      <br>
      <br>
      <blockquote type="cite">          r =
        svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
        <br>
        -                           true, true);
        <br>
        +                           true, true, flush_tlb);
        <br>
                  if (r)
        <br>
                      pr_debug("failed %d to map svm range\n", r);
        <br>
          </blockquote>
    </blockquote>
  </body>
</html>