<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2023-09-15 17:06, Chen, Xiaogang
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:28e8c90b-0ca8-6c66-0ac5-18277aa0ddf3@amd.com">
      <br>
      On 9/15/2023 8:28 AM, Philip Yang wrote:
      <br>
      <blockquote type="cite">Caution: This message originated from an
        External Source. Use proper caution when opening attachments,
        clicking links, or responding.
        <br>
        <br>
        <br>
        If new range is splited to multiple pranges with
        max_svm_range_pages
        <br>
        alignment and added to update_list, svm validate and map should
        keep
        <br>
        going after error to make sure maps_to_gpu flag is up to date
        for the
        <br>
        whole range.
        <br>
        <br>
        svm validate and map update set prange->mapped_to_gpu after
        mapping to
        <br>
        GPUs successfully, otherwise clear prange->mapped_to_gpu flag
        (for
        <br>
        update mapping case) instead of setting error flag, we can
        remove
        <br>
        the redundant error flag to simpliy code.
        <br>
        <br>
        Update prange->mapped_to_gpu flag inside svm_range_lock, to
        guarant we
        <br>
        always set prange invalid flag to evict queues or unmap from
        GPUs before
        <br>
        the system memory is moved.
        <br>
        <br>
        After svm validate and map return error, the caller retry will
        update
        <br>
        the mapping for the whole range.
        <br>
        <br>
        Fixes: c22b04407097 ("drm/amdkfd: flag added to handle errors
        from svm validate and map")
        <br>
        Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++----------
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -
        <br>
          2 files changed, 8 insertions(+), 11 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        index 5d7ba7dbf6ce..26018b1d6138 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        @@ -818,7 +818,7 @@ svm_range_is_same_attrs(struct kfd_process
        *p, struct svm_range *prange,
        <br>
                         }
        <br>
                 }
        <br>
        <br>
        -       return !prange->is_error_flag;
        <br>
        +       return true;
        <br>
          }
        <br>
        <br>
          /**
        <br>
        @@ -1724,20 +1724,17 @@ static int
        svm_range_validate_and_map(struct mm_struct *mm,
        <br>
                                                   ctx->bitmap, wait,
        flush_tlb);
        <br>
        <br>
          unlock_out:
        <br>
        +               prange->mapped_to_gpu = !r;
        <br>
      </blockquote>
      <br>
      I do not understand why set prange->mapped_to_gpu here? It
      handles one vma, not for all prange. If there are multiple vma and
      first vma handle is ok, and second vma handle fail at
      amdgpu_hmm_range_get_pages or svm_range_dma_map, you would get
      prange->mapped_to_gpu be true, but only part of pragne got
      mapped, right?
      <br>
    </blockquote>
    <p>If all vmas map to gpu successfully, prange->mapped_to_gpu set
      to true, otherwise, prange->mapped_to_gpu set to false, and
      then svm validate map to gpu return failed, the caller will retry
      if error code is -EAGAIN.</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:28e8c90b-0ca8-6c66-0ac5-18277aa0ddf3@amd.com">
      <br>
      <br>
      Regards
      <br>
      <br>
      Xiaogang
      <br>
      <br>
      <blockquote type="cite">                 svm_range_unlock(prange);
        <br>
        <br>
                         addr = next;
        <br>
                 }
        <br>
        <br>
        -       if (addr == end) {
        <br>
        +       if (addr == end)
        <br>
                         prange->validated_once = true;
        <br>
        -               prange->mapped_to_gpu = true;
        <br>
        -       }
        <br>
        <br>
          unreserve_out:
        <br>
                 svm_range_unreserve_bos(ctx);
        <br>
        -
        <br>
        -       prange->is_error_flag = !!r;
        <br>
                 if (!r)
        <br>
                         prange->validate_timestamp =
        ktime_get_boottime();
        <br>
        <br>
        @@ -2106,7 +2103,8 @@ svm_range_add(struct kfd_process *p,
        uint64_t start, uint64_t size,
        <br>
                         next = interval_tree_iter_next(node, start,
        last);
        <br>
                         next_start = min(node->last, last) + 1;
        <br>
        <br>
        -               if (svm_range_is_same_attrs(p, prange, nattr,
        attrs)) {
        <br>
        +               if (svm_range_is_same_attrs(p, prange, nattr,
        attrs) &&
        <br>
        +                   prange->mapped_to_gpu) {
        <br>
                                 /* nothing to do */
        <br>
                         } else if (node->start < start ||
        node->last > last) {
        <br>
                                 /* node intersects the update range and
        its attributes
        <br>
        @@ -3519,7 +3517,7 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
                 struct svm_range *next;
        <br>
                 bool update_mapping = false;
        <br>
                 bool flush_tlb;
        <br>
        -       int r = 0;
        <br>
        +       int r, ret = 0;
        <br>
        <br>
                 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages
        0x%llx\n",
        <br>
                          p->pasid, &p->svms, start, start +
        size - 1, size);
        <br>
        @@ -3607,7 +3605,7 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
          out_unlock_range:
        <br>
                         mutex_unlock(&prange->migrate_mutex);
        <br>
                         if (r)
        <br>
        -                       break;
        <br>
        +                       ret = r;
        <br>
                 }
        <br>
        <br>
                 dynamic_svm_range_dump(svms);
        <br>
        @@ -3620,7 +3618,7 @@ svm_range_set_attr(struct kfd_process *p,
        struct mm_struct *mm,
        <br>
                 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done,
        r=%d\n", p->pasid,
        <br>
                          &p->svms, start, start + size - 1, r);
        <br>
        <br>
        -       return r;
        <br>
        +       return ret ? ret : r;
        <br>
          }
        <br>
        <br>
          static int
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
        <br>
        index 9e668eeefb32..91715bf3233c 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
        <br>
        @@ -134,7 +134,6 @@ struct svm_range {
        <br>
                 DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
        <br>
                 bool                            validated_once;
        <br>
                 bool                            mapped_to_gpu;
        <br>
        -       bool                            is_error_flag;
        <br>
          };
        <br>
        <br>
          static inline void svm_range_lock(struct svm_range *prange)
        <br>
        --
        <br>
        2.35.1
        <br>
        <br>
      </blockquote>
    </blockquote>
  </body>
</html>