<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2023-09-20 10:35, Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:b6fb1ca2-906c-08c5-9dd7-34e7f21af870@amd.com">
      <br>
      On 2023-09-20 10:20, Philip Yang wrote:
      <br>
      <blockquote type="cite">
        <br>
        <br>
        On 2023-09-19 17:15, Felix Kuehling wrote:
        <br>
        <blockquote type="cite">
          <br>
          On 2023-09-19 10:21, Philip Yang wrote:
          <br>
          <blockquote type="cite">If new range is splited to multiple
            pranges with max_svm_range_pages
            <br>
            alignment and added to update_list, svm validate and map
            should keep
            <br>
            going after error to make sure prange->mapped_to_gpu flag
            is up to date
            <br>
            for the whole range.
            <br>
            <br>
            svm validate and map update set prange->mapped_to_gpu
            after mapping to
            <br>
            GPUs successfully, otherwise clear prange->mapped_to_gpu
            flag (for
            <br>
            update mapping case) instead of setting error flag, we can
            remove
            <br>
            the redundant error flag to simpliy code.
            <br>
            <br>
            Refactor to remove goto and update prange->mapped_to_gpu
            flag inside
            <br>
            svm_range_lock, to guarant we always evict queues or unmap
            from GPUs if
            <br>
            there are invalid ranges.
            <br>
            <br>
            After svm validate and map return error -EAGIN, the caller
            retry will
            <br>
            update the mapping for the whole range again.
            <br>
            <br>
            Fixes: c22b04407097 ("drm/amdkfd: flag added to handle
            errors from svm validate and map")
            <br>
            Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
            <br>
            ---
            <br>
              drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 78
            +++++++++++++---------------
            <br>
              drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -
            <br>
              2 files changed, 36 insertions(+), 43 deletions(-)
            <br>
            <br>
            diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
            b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
            <br>
            index 50c29fd844fb..4812f4ac5579 100644
            <br>
            --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
            <br>
            +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
            <br>
            @@ -818,7 +818,7 @@ svm_range_is_same_attrs(struct
            kfd_process *p, struct svm_range *prange,
            <br>
                      }
            <br>
                  }
            <br>
              -    return !prange->is_error_flag;
            <br>
            +    return true;
            <br>
              }
            <br>
                /**
            <br>
            @@ -1671,7 +1671,7 @@ static int
            svm_range_validate_and_map(struct mm_struct *mm,
            <br>
                    start = prange->start << PAGE_SHIFT;
            <br>
                  end = (prange->last + 1) << PAGE_SHIFT;
            <br>
            -    for (addr = start; addr < end && !r; ) {
            <br>
            +    for (addr = start; !r && addr < end; ) {
            <br>
                      struct hmm_range *hmm_range;
            <br>
                      struct vm_area_struct *vma;
            <br>
                      unsigned long next;
            <br>
            @@ -1680,62 +1680,55 @@ static int
            svm_range_validate_and_map(struct mm_struct *mm,
            <br>
                      bool readonly;
            <br>
                        vma = vma_lookup(mm, addr);
            <br>
            -        if (!vma) {
            <br>
            +        if (vma) {
            <br>
            +            readonly = !(vma->vm_flags & VM_WRITE);
            <br>
            +
            <br>
            +            next = min(vma->vm_end, end);
            <br>
            +            npages = (next - addr) >> PAGE_SHIFT;
            <br>
            +            WRITE_ONCE(p->svms.faulting_task, current);
            <br>
            +            r =
            amdgpu_hmm_range_get_pages(&prange->notifier, addr,
            npages,
            <br>
            +                               readonly, owner, NULL,
            <br>
            +                               &hmm_range);
            <br>
            +            WRITE_ONCE(p->svms.faulting_task, NULL);
            <br>
            +            if (r) {
            <br>
            +                pr_debug("failed %d to get svm range
            pages\n", r);
            <br>
            +                if (r == -EBUSY)
            <br>
            +                    r = -EAGAIN;
            <br>
            +            }
            <br>
            +        } else {
            <br>
                          r = -EFAULT;
            <br>
            -            goto unreserve_out;
            <br>
            -        }
            <br>
            -        readonly = !(vma->vm_flags & VM_WRITE);
            <br>
            -
            <br>
            -        next = min(vma->vm_end, end);
            <br>
            -        npages = (next - addr) >> PAGE_SHIFT;
            <br>
            -        WRITE_ONCE(p->svms.faulting_task, current);
            <br>
            -        r =
            amdgpu_hmm_range_get_pages(&prange->notifier, addr,
            npages,
            <br>
            -                           readonly, owner, NULL,
            <br>
            -                           &hmm_range);
            <br>
            -        WRITE_ONCE(p->svms.faulting_task, NULL);
            <br>
            -        if (r) {
            <br>
            -            pr_debug("failed %d to get svm range pages\n",
            r);
            <br>
            -            if (r == -EBUSY)
            <br>
            -                r = -EAGAIN;
            <br>
            -            goto unreserve_out;
            <br>
                      }
            <br>
              -        offset = (addr - start) >> PAGE_SHIFT;
            <br>
            -        r = svm_range_dma_map(prange, ctx->bitmap,
            offset, npages,
            <br>
            -                      hmm_range->hmm_pfns);
            <br>
            -        if (r) {
            <br>
            -            pr_debug("failed %d to dma map range\n", r);
            <br>
            -            goto unreserve_out;
            <br>
            +        if (!r) {
            <br>
            +            offset = (addr - start) >> PAGE_SHIFT;
            <br>
            +            r = svm_range_dma_map(prange, ctx->bitmap,
            offset, npages,
            <br>
            +                          hmm_range->hmm_pfns);
            <br>
            +            if (r)
            <br>
            +                pr_debug("failed %d to dma map range\n",
            r);
            <br>
                      }
            <br>
                        svm_range_lock(prange);
            <br>
            -        if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
            <br>
            +        if (!r &&
            amdgpu_hmm_range_get_pages_done(hmm_range)) {
            <br>
                          pr_debug("hmm update the range, need validate
            again\n");
            <br>
                          r = -EAGAIN;
            <br>
            -            goto unlock_out;
            <br>
                      }
            <br>
            -        if (!list_empty(&prange->child_list)) {
            <br>
            +
            <br>
            +        if (!r &&
            !list_empty(&prange->child_list)) {
            <br>
                          pr_debug("range split by unmap in parallel,
            validate again\n");
            <br>
                          r = -EAGAIN;
            <br>
            -            goto unlock_out;
            <br>
                      }
            <br>
              -        r = svm_range_map_to_gpus(prange, offset, npages,
            readonly,
            <br>
            -                      ctx->bitmap, wait, flush_tlb);
            <br>
            +        if (!r)
            <br>
            +            r = svm_range_map_to_gpus(prange, offset,
            npages, readonly,
            <br>
            +                          ctx->bitmap, wait, flush_tlb);
            <br>
              -unlock_out:
            <br>
            +        prange->mapped_to_gpu = !r;
            <br>
          </blockquote>
          <br>
          I'm still concerned that this can update
          prange->mapped_to_gpu to "true" before the entire range has
          been successfully mapped. This could cause race conditions if
          someone looks at this variable while a validate_and_map is in
          progress. This would avoid such race conditions:
          <br>
          <br>
                  if (!r && next == end)
          <br>
                      prange->mapped_to_gpu = true;
          <br>
          <br>
        </blockquote>
        thanks, will also add else path for error handling, to exit the
        loop with correct flag.
        <br>
        <br>
                  else if (r)
        <br>
        <br>
                     prange->mapped_to_gpu = false;
        <br>
        <br>
      </blockquote>
      I thought about that. I think the flag should be false going into
      the function. There should be no need to validate and map the
      range if it's already mapped and valid. So if anything, we should
      set the flag to false in the beginning.
      <br>
    </blockquote>
    <p>I was overthinking, you are right, if set_attr update multiple
      pranges failed, set_attr retry will not process
      prange->mapped_to_gpu, evict and restore worker will handle it
      correctly with xnack off, and restore page will update mapping
      with xnack on.</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:b6fb1ca2-906c-08c5-9dd7-34e7f21af870@amd.com">
      <br>
      Regards,
      <br>
        Felix
      <br>
      <br>
      <br>
      <blockquote type="cite">Regards,
        <br>
        <br>
        Philip
        <br>
        <br>
        <blockquote type="cite">Regards,
          <br>
            Felix
          <br>
          <br>
          <br>
          <blockquote type="cite">          svm_range_unlock(prange);
            <br>
                        addr = next;
            <br>
                  }
            <br>
              -    if (addr == end)
            <br>
            -        prange->mapped_to_gpu = true;
            <br>
            -
            <br>
            -unreserve_out:
            <br>
                  svm_range_unreserve_bos(ctx);
            <br>
            -
            <br>
            -    prange->is_error_flag = !!r;
            <br>
                  if (!r)
            <br>
                      prange->validate_timestamp =
            ktime_get_boottime();
            <br>
              @@ -2104,7 +2097,8 @@ svm_range_add(struct kfd_process *p,
            uint64_t start, uint64_t size,
            <br>
                      next = interval_tree_iter_next(node, start, last);
            <br>
                      next_start = min(node->last, last) + 1;
            <br>
              -        if (svm_range_is_same_attrs(p, prange, nattr,
            attrs)) {
            <br>
            +        if (svm_range_is_same_attrs(p, prange, nattr,
            attrs) &&
            <br>
            +            prange->mapped_to_gpu) {
            <br>
                          /* nothing to do */
            <br>
                      } else if (node->start < start ||
            node->last > last) {
            <br>
                          /* node intersects the update range and its
            attributes
            <br>
            @@ -3517,7 +3511,7 @@ svm_range_set_attr(struct kfd_process
            *p, struct mm_struct *mm,
            <br>
                  struct svm_range *next;
            <br>
                  bool update_mapping = false;
            <br>
                  bool flush_tlb;
            <br>
            -    int r = 0;
            <br>
            +    int r, ret = 0;
            <br>
                    pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages
            0x%llx\n",
            <br>
                       p->pasid, &p->svms, start, start + size
            - 1, size);
            <br>
            @@ -3605,7 +3599,7 @@ svm_range_set_attr(struct kfd_process
            *p, struct mm_struct *mm,
            <br>
              out_unlock_range:
            <br>
                      mutex_unlock(&prange->migrate_mutex);
            <br>
                      if (r)
            <br>
            -            break;
            <br>
            +            ret = r;
            <br>
                  }
            <br>
                    dynamic_svm_range_dump(svms);
            <br>
            @@ -3618,7 +3612,7 @@ svm_range_set_attr(struct kfd_process
            *p, struct mm_struct *mm,
            <br>
                  pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done,
            r=%d\n", p->pasid,
            <br>
                       &p->svms, start, start + size - 1, r);
            <br>
              -    return r;
            <br>
            +    return ret ? ret : r;
            <br>
              }
            <br>
                static int
            <br>
            diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
            b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
            <br>
            index c216c8dd13c6..25f711905738 100644
            <br>
            --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
            <br>
            +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
            <br>
            @@ -133,7 +133,6 @@ struct svm_range {
            <br>
                  DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
            <br>
                  DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
            <br>
                  bool                mapped_to_gpu;
            <br>
            -    bool                is_error_flag;
            <br>
              };
            <br>
                static inline void svm_range_lock(struct svm_range
            *prange)
            <br>
          </blockquote>
        </blockquote>
      </blockquote>
    </blockquote>
  </body>
</html>