[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

Thu Oct 19 20:14:49 UTC 2023

On 10/19/2023 2:40 PM, Philip Yang wrote:
>
>
> On 2023-10-19 12:20, Chen, Xiaogang wrote:
>>
>> On 10/19/2023 11:08 AM, Philip Yang wrote:
>>>
>>>
>>> On 2023-10-19 10:24, Xiaogang.Chen wrote:
>>>> From: Xiaogang Chen<xiaogang.chen at amd.com>
>>>>
>>>> After partial migration to recover GPU page fault this patch does 
>>>> GPU vm
>>>> space mapping for same page range that got migrated instead of 
>>>> mapping all
>>>> pages of svm range in which the page fault happened.
>>>>
>>>> Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 
>>>> +++++++++++++++++++++-------
>>>>   1 file changed, 25 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> index 54af7a2b29f8..81dbcc8a4ccc 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct 
>>>> kfd_process *p, int32_t gpuidx)
>>>>    * 5. Release page table (and SVM BO) reservation
>>>>    */
>>>>   static int svm_range_validate_and_map(struct mm_struct *mm,
>>>> +                      unsigned long map_start, unsigned long 
>>>> map_last,
>>>>                         struct svm_range *prange, int32_t gpuidx,
>>>>                         bool intr, bool wait, bool flush_tlb)
>>>>   {
>>>> @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct 
>>>> mm_struct *mm,
>>>>       int32_t idx;
>>>>       int r = 0;
>>>>   +    if (map_start < prange->start || map_last > prange->last) {
>>> This is not needed, as this case should never happen, and you also 
>>> use max/min to limit map_start, map_last below.
>> This was just a sanity check, I can remove it.
>>>> +        pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
>>>> +                 map_start, map_last, prange->start, prange->last);
>>>> +        return -EFAULT;
>>>> +    }
>>>> +
>>>>       ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
>>>>       if (!ctx)
>>>>           return -ENOMEM;
>>>> @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct 
>>>> mm_struct *mm,
>>>>               r = -EAGAIN;
>>>>           }
>>>>   -        if (!r)
>>>> -            r = svm_range_map_to_gpus(prange, offset, npages, 
>>>> readonly,
>>>> -                          ctx->bitmap, wait, flush_tlb);
>>>> +        if (!r) {
>>>> +            map_start = max(map_start, prange->start + offset);
>>>> +            map_last = min(map_last, prange->start + offset + 
>>>> npages);
>>>
>>> This should move forward to outside the for loop, otherwise 
>>> amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the 
>>> entire prange, and then prange->vram_pages update logic should be 
>>> changed accordingly.
>>>
>> We need use hmm function to get all vram page number on whole range 
>> as we did not know how many vram pages after partial migration, then 
>> we know if can release vram bo.
>
> ok,migrate to vram and migrate to ram have the vram pages updated 
> already, the is needed for the splite prange. We could update 
> prange->vram_pages when splitting prange, this can be done in another 
> patch.
>
> map_last is inclusive,
>
> +            map_last = min(map_last, prange->start + offset + npages 
> - 1);
>
ok, will update it.

Regards

Xiaogang

> Regards,
>
> Philip
>
>>
>> Regards
>>
>> Xiaogang
>>
>>> Regards,
>>>
>>> Philip
>>>
>>>> +            if (map_start <= map_last) {
>>>> +                offset = map_start - prange->start;
>>>> +                npages = map_last - map_start + 1;
>>>> +                r = svm_range_map_to_gpus(prange, offset, npages, 
>>>> readonly,
>>>> +                              ctx->bitmap, wait, flush_tlb);
>>>> +            }
>>>> +        }
>>>>             if (!r && next == end)
>>>>               prange->mapped_to_gpu = true;
>>>> @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct 
>>>> work_struct *work)
>>>>            */
>>>>           mutex_lock(&prange->migrate_mutex);
>>>>   -        r = svm_range_validate_and_map(mm, prange, 
>>>> MAX_GPU_INSTANCE,
>>>> -                           false, true, false);
>>>> +        r = svm_range_validate_and_map(mm, prange->start, 
>>>> prange->last, prange,
>>>> +                           MAX_GPU_INSTANCE, false, true, false);
>>>>           if (r)
>>>>               pr_debug("failed %d to map 0x%lx to gpus\n", r,
>>>>                    prange->start);
>>>> @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device 
>>>> *adev, unsigned int pasid,
>>>>       kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
>>>>                          write_fault, timestamp);
>>>>   +    start = prange->start;
>>>> +    last = prange->last;
>>>>       if (prange->actual_loc != 0 || best_loc != 0) {
>>>>           migration = true;
>>>>           /* Align migration range start and size to granularity 
>>>> size */
>>>> @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct 
>>>> amdgpu_device *adev, unsigned int pasid,
>>>>           }
>>>>       }
>>>>   -    r = svm_range_validate_and_map(mm, prange, gpuidx, false, 
>>>> false, false);
>>>> +    r = svm_range_validate_and_map(mm, start, last, prange, 
>>>> gpuidx, false,
>>>> +                       false, false);
>>>>       if (r)
>>>>           pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to 
>>>> gpus\n",
>>>> -             r, svms, prange->start, prange->last);
>>>> +             r, svms, start, last);
>>>>         kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
>>>>                        migration);
>>>> @@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, 
>>>> struct mm_struct *mm,
>>>>             flush_tlb = !migrated && update_mapping && 
>>>> prange->mapped_to_gpu;
>>>>   -        r = svm_range_validate_and_map(mm, prange, 
>>>> MAX_GPU_INSTANCE,
>>>> +        r = svm_range_validate_and_map(mm, prange->start, 
>>>> prange->last, prange, MAX_GPU_INSTANCE,
>>>>                              true, true, flush_tlb);
>>>>           if (r)
>>>>               pr_debug("failed %d to map svm range\n", r);