[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery
Chen, Xiaogang
xiaogang.chen at amd.com
Thu Oct 19 20:14:49 UTC 2023
On 10/19/2023 2:40 PM, Philip Yang wrote:
>
>
> On 2023-10-19 12:20, Chen, Xiaogang wrote:
>>
>> On 10/19/2023 11:08 AM, Philip Yang wrote:
>>>
>>>
>>> On 2023-10-19 10:24, Xiaogang.Chen wrote:
>>>> From: Xiaogang Chen<xiaogang.chen at amd.com>
>>>>
>>>> After partial migration to recover GPU page fault this patch does
>>>> GPU vm
>>>> space mapping for same page range that got migrated instead of
>>>> mapping all
>>>> pages of svm range in which the page fault happened.
>>>>
>>>> Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33
>>>> +++++++++++++++++++++-------
>>>> 1 file changed, 25 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> index 54af7a2b29f8..81dbcc8a4ccc 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> @@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct
>>>> kfd_process *p, int32_t gpuidx)
>>>> * 5. Release page table (and SVM BO) reservation
>>>> */
>>>> static int svm_range_validate_and_map(struct mm_struct *mm,
>>>> + unsigned long map_start, unsigned long
>>>> map_last,
>>>> struct svm_range *prange, int32_t gpuidx,
>>>> bool intr, bool wait, bool flush_tlb)
>>>> {
>>>> @@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct
>>>> mm_struct *mm,
>>>> int32_t idx;
>>>> int r = 0;
>>>> + if (map_start < prange->start || map_last > prange->last) {
>>> This is not needed, as this case should never happen, and you also
>>> use max/min to limit map_start, map_last below.
>> This was just a sanity check, I can remove it.
>>>> + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
>>>> + map_start, map_last, prange->start, prange->last);
>>>> + return -EFAULT;
>>>> + }
>>>> +
>>>> ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
>>>> if (!ctx)
>>>> return -ENOMEM;
>>>> @@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct
>>>> mm_struct *mm,
>>>> r = -EAGAIN;
>>>> }
>>>> - if (!r)
>>>> - r = svm_range_map_to_gpus(prange, offset, npages,
>>>> readonly,
>>>> - ctx->bitmap, wait, flush_tlb);
>>>> + if (!r) {
>>>> + map_start = max(map_start, prange->start + offset);
>>>> + map_last = min(map_last, prange->start + offset +
>>>> npages);
>>>
>>> This should move forward to outside the for loop, otherwise
>>> amdgpu_hmm_range_get_pages and svm_range_dma_map still work on the
>>> entire prange, and then prange->vram_pages update logic should be
>>> changed accordingly.
>>>
>> We need use hmm function to get all vram page number on whole range
>> as we did not know how many vram pages after partial migration, then
>> we know if can release vram bo.
>
> ok,migrate to vram and migrate to ram have the vram pages updated
> already, the is needed for the splite prange. We could update
> prange->vram_pages when splitting prange, this can be done in another
> patch.
>
> map_last is inclusive,
>
> + map_last = min(map_last, prange->start + offset + npages
> - 1);
>
ok, will update it.
Regards
Xiaogang
> Regards,
>
> Philip
>
>>
>> Regards
>>
>> Xiaogang
>>
>>> Regards,
>>>
>>> Philip
>>>
>>>> + if (map_start <= map_last) {
>>>> + offset = map_start - prange->start;
>>>> + npages = map_last - map_start + 1;
>>>> + r = svm_range_map_to_gpus(prange, offset, npages,
>>>> readonly,
>>>> + ctx->bitmap, wait, flush_tlb);
>>>> + }
>>>> + }
>>>> if (!r && next == end)
>>>> prange->mapped_to_gpu = true;
>>>> @@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct
>>>> work_struct *work)
>>>> */
>>>> mutex_lock(&prange->migrate_mutex);
>>>> - r = svm_range_validate_and_map(mm, prange,
>>>> MAX_GPU_INSTANCE,
>>>> - false, true, false);
>>>> + r = svm_range_validate_and_map(mm, prange->start,
>>>> prange->last, prange,
>>>> + MAX_GPU_INSTANCE, false, true, false);
>>>> if (r)
>>>> pr_debug("failed %d to map 0x%lx to gpus\n", r,
>>>> prange->start);
>>>> @@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device
>>>> *adev, unsigned int pasid,
>>>> kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
>>>> write_fault, timestamp);
>>>> + start = prange->start;
>>>> + last = prange->last;
>>>> if (prange->actual_loc != 0 || best_loc != 0) {
>>>> migration = true;
>>>> /* Align migration range start and size to granularity
>>>> size */
>>>> @@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct
>>>> amdgpu_device *adev, unsigned int pasid,
>>>> }
>>>> }
>>>> - r = svm_range_validate_and_map(mm, prange, gpuidx, false,
>>>> false, false);
>>>> + r = svm_range_validate_and_map(mm, start, last, prange,
>>>> gpuidx, false,
>>>> + false, false);
>>>> if (r)
>>>> pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to
>>>> gpus\n",
>>>> - r, svms, prange->start, prange->last);
>>>> + r, svms, start, last);
>>>> kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
>>>> migration);
>>>> @@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p,
>>>> struct mm_struct *mm,
>>>> flush_tlb = !migrated && update_mapping &&
>>>> prange->mapped_to_gpu;
>>>> - r = svm_range_validate_and_map(mm, prange,
>>>> MAX_GPU_INSTANCE,
>>>> + r = svm_range_validate_and_map(mm, prange->start,
>>>> prange->last, prange, MAX_GPU_INSTANCE,
>>>> true, true, flush_tlb);
>>>> if (r)
>>>> pr_debug("failed %d to map svm range\n", r);
More information about the amd-gfx
mailing list