[PATCH v3] drm/amdkfd: Handle errors from svm validate and map

Wed Sep 20 14:35:25 UTC 2023

On 2023-09-20 10:20, Philip Yang wrote:
>
>
> On 2023-09-19 17:15, Felix Kuehling wrote:
>>
>> On 2023-09-19 10:21, Philip Yang wrote:
>>> If new range is splited to multiple pranges with max_svm_range_pages
>>> alignment and added to update_list, svm validate and map should keep
>>> going after error to make sure prange->mapped_to_gpu flag is up to date
>>> for the whole range.
>>>
>>> svm validate and map update set prange->mapped_to_gpu after mapping to
>>> GPUs successfully, otherwise clear prange->mapped_to_gpu flag (for
>>> update mapping case) instead of setting error flag, we can remove
>>> the redundant error flag to simpliy code.
>>>
>>> Refactor to remove goto and update prange->mapped_to_gpu flag inside
>>> svm_range_lock, to guarant we always evict queues or unmap from GPUs if
>>> there are invalid ranges.
>>>
>>> After svm validate and map return error -EAGIN, the caller retry will
>>> update the mapping for the whole range again.
>>>
>>> Fixes: c22b04407097 ("drm/amdkfd: flag added to handle errors from 
>>> svm validate and map")
>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 78 
>>> +++++++++++++---------------
>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -
>>>   2 files changed, 36 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 50c29fd844fb..4812f4ac5579 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -818,7 +818,7 @@ svm_range_is_same_attrs(struct kfd_process *p, 
>>> struct svm_range *prange,
>>>           }
>>>       }
>>>   -    return !prange->is_error_flag;
>>> +    return true;
>>>   }
>>>     /**
>>> @@ -1671,7 +1671,7 @@ static int svm_range_validate_and_map(struct 
>>> mm_struct *mm,
>>>         start = prange->start << PAGE_SHIFT;
>>>       end = (prange->last + 1) << PAGE_SHIFT;
>>> -    for (addr = start; addr < end && !r; ) {
>>> +    for (addr = start; !r && addr < end; ) {
>>>           struct hmm_range *hmm_range;
>>>           struct vm_area_struct *vma;
>>>           unsigned long next;
>>> @@ -1680,62 +1680,55 @@ static int svm_range_validate_and_map(struct 
>>> mm_struct *mm,
>>>           bool readonly;
>>>             vma = vma_lookup(mm, addr);
>>> -        if (!vma) {
>>> +        if (vma) {
>>> +            readonly = !(vma->vm_flags & VM_WRITE);
>>> +
>>> +            next = min(vma->vm_end, end);
>>> +            npages = (next - addr) >> PAGE_SHIFT;
>>> +            WRITE_ONCE(p->svms.faulting_task, current);
>>> +            r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
>>> npages,
>>> +                               readonly, owner, NULL,
>>> +                               &hmm_range);
>>> +            WRITE_ONCE(p->svms.faulting_task, NULL);
>>> +            if (r) {
>>> +                pr_debug("failed %d to get svm range pages\n", r);
>>> +                if (r == -EBUSY)
>>> +                    r = -EAGAIN;
>>> +            }
>>> +        } else {
>>>               r = -EFAULT;
>>> -            goto unreserve_out;
>>> -        }
>>> -        readonly = !(vma->vm_flags & VM_WRITE);
>>> -
>>> -        next = min(vma->vm_end, end);
>>> -        npages = (next - addr) >> PAGE_SHIFT;
>>> -        WRITE_ONCE(p->svms.faulting_task, current);
>>> -        r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
>>> npages,
>>> -                           readonly, owner, NULL,
>>> -                           &hmm_range);
>>> -        WRITE_ONCE(p->svms.faulting_task, NULL);
>>> -        if (r) {
>>> -            pr_debug("failed %d to get svm range pages\n", r);
>>> -            if (r == -EBUSY)
>>> -                r = -EAGAIN;
>>> -            goto unreserve_out;
>>>           }
>>>   -        offset = (addr - start) >> PAGE_SHIFT;
>>> -        r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
>>> -                      hmm_range->hmm_pfns);
>>> -        if (r) {
>>> -            pr_debug("failed %d to dma map range\n", r);
>>> -            goto unreserve_out;
>>> +        if (!r) {
>>> +            offset = (addr - start) >> PAGE_SHIFT;
>>> +            r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
>>> +                          hmm_range->hmm_pfns);
>>> +            if (r)
>>> +                pr_debug("failed %d to dma map range\n", r);
>>>           }
>>>             svm_range_lock(prange);
>>> -        if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
>>> +        if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
>>>               pr_debug("hmm update the range, need validate again\n");
>>>               r = -EAGAIN;
>>> -            goto unlock_out;
>>>           }
>>> -        if (!list_empty(&prange->child_list)) {
>>> +
>>> +        if (!r && !list_empty(&prange->child_list)) {
>>>               pr_debug("range split by unmap in parallel, validate 
>>> again\n");
>>>               r = -EAGAIN;
>>> -            goto unlock_out;
>>>           }
>>>   -        r = svm_range_map_to_gpus(prange, offset, npages, readonly,
>>> -                      ctx->bitmap, wait, flush_tlb);
>>> +        if (!r)
>>> +            r = svm_range_map_to_gpus(prange, offset, npages, 
>>> readonly,
>>> +                          ctx->bitmap, wait, flush_tlb);
>>>   -unlock_out:
>>> +        prange->mapped_to_gpu = !r;
>>
>> I'm still concerned that this can update prange->mapped_to_gpu to 
>> "true" before the entire range has been successfully mapped. This 
>> could cause race conditions if someone looks at this variable while a 
>> validate_and_map is in progress. This would avoid such race conditions:
>>
>>         if (!r && next == end)
>>             prange->mapped_to_gpu = true;
>>
> thanks, will also add else path for error handling, to exit the loop 
> with correct flag.
>
>           else if (r)
>
>              prange->mapped_to_gpu = false;
>
I thought about that. I think the flag should be false going into the 
function. There should be no need to validate and map the range if it's 
already mapped and valid. So if anything, we should set the flag to 
false in the beginning.

Regards,
   Felix


> Regards,
>
> Philip
>
>> Regards,
>>   Felix
>>
>>
>>>           svm_range_unlock(prange);
>>>             addr = next;
>>>       }
>>>   -    if (addr == end)
>>> -        prange->mapped_to_gpu = true;
>>> -
>>> -unreserve_out:
>>>       svm_range_unreserve_bos(ctx);
>>> -
>>> -    prange->is_error_flag = !!r;
>>>       if (!r)
>>>           prange->validate_timestamp = ktime_get_boottime();
>>>   @@ -2104,7 +2097,8 @@ svm_range_add(struct kfd_process *p, 
>>> uint64_t start, uint64_t size,
>>>           next = interval_tree_iter_next(node, start, last);
>>>           next_start = min(node->last, last) + 1;
>>>   -        if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
>>> +        if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
>>> +            prange->mapped_to_gpu) {
>>>               /* nothing to do */
>>>           } else if (node->start < start || node->last > last) {
>>>               /* node intersects the update range and its attributes
>>> @@ -3517,7 +3511,7 @@ svm_range_set_attr(struct kfd_process *p, 
>>> struct mm_struct *mm,
>>>       struct svm_range *next;
>>>       bool update_mapping = false;
>>>       bool flush_tlb;
>>> -    int r = 0;
>>> +    int r, ret = 0;
>>>         pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
>>>            p->pasid, &p->svms, start, start + size - 1, size);
>>> @@ -3605,7 +3599,7 @@ svm_range_set_attr(struct kfd_process *p, 
>>> struct mm_struct *mm,
>>>   out_unlock_range:
>>>           mutex_unlock(&prange->migrate_mutex);
>>>           if (r)
>>> -            break;
>>> +            ret = r;
>>>       }
>>>         dynamic_svm_range_dump(svms);
>>> @@ -3618,7 +3612,7 @@ svm_range_set_attr(struct kfd_process *p, 
>>> struct mm_struct *mm,
>>>       pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", 
>>> p->pasid,
>>>            &p->svms, start, start + size - 1, r);
>>>   -    return r;
>>> +    return ret ? ret : r;
>>>   }
>>>     static int
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>>> index c216c8dd13c6..25f711905738 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>>> @@ -133,7 +133,6 @@ struct svm_range {
>>>       DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
>>>       DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
>>>       bool                mapped_to_gpu;
>>> -    bool                is_error_flag;
>>>   };
>>>     static inline void svm_range_lock(struct svm_range *prange)