[PATCH] drm/amdkfd: Correct svm prange overlapping handling at svm_range_set_attr ioctl
Chen, Xiaogang
xiaogang.chen at amd.com
Mon Jun 24 19:09:18 UTC 2024
On 6/24/2024 11:17 AM, Philip Yang wrote:
>
>
> On 2024-06-21 13:28, Xiaogang.Chen wrote:
>> From: Xiaogang Chen<xiaogang.chen at amd.com>
>>
>> When user adds new vm range that has overlapping with existing svm pranges
>> current kfd clones new prange and remove existing pranges including all data
>> associate with it. It is not necessary. We can handle the overlapping on
>> existing pranges directly that would simplify kfd code. And, when remove a
>> existing prange the locks from it will get destroyed. This may cause issue if
>> code still use these locks. And locks from cloned prange do not inherit
>> context of locks that got removed.
>>
>> This patch does not remove existing pranges or clone new pranges, keeps locks
>> of pranges alive.
>>
>> Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 89 ++++------------------------
>> 1 file changed, 12 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 407636a68814..a8fcace6f9a2 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -904,23 +904,6 @@ svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
>> return (void *)dst;
>> }
>>
>> -static int
>> -svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
>> -{
>> - int i;
>> -
>> - for (i = 0; i < MAX_GPU_INSTANCE; i++) {
>> - if (!src->dma_addr[i])
>> - continue;
>> - dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
>> - sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
>> - if (!dst->dma_addr[i])
>> - return -ENOMEM;
>> - }
>> -
>> - return 0;
>> -}
>> -
>> static int
>> svm_range_split_array(void *ppnew, void *ppold, size_t size,
>> uint64_t old_start, uint64_t old_n,
>> @@ -1967,38 +1950,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
>> return r;
>> }
>>
>> -static struct svm_range *svm_range_clone(struct svm_range *old)
>> -{
>> - struct svm_range *new;
>> -
>> - new = svm_range_new(old->svms, old->start, old->last, false);
>> - if (!new)
>> - return NULL;
>> - if (svm_range_copy_dma_addrs(new, old)) {
>> - svm_range_free(new, false);
>> - return NULL;
>> - }
>> - if (old->svm_bo) {
>> - new->ttm_res = old->ttm_res;
>> - new->offset = old->offset;
>> - new->svm_bo = svm_range_bo_ref(old->svm_bo);
>> - spin_lock(&new->svm_bo->list_lock);
>> - list_add(&new->svm_bo_list, &new->svm_bo->range_list);
>> - spin_unlock(&new->svm_bo->list_lock);
>> - }
>> - new->flags = old->flags;
>> - new->preferred_loc = old->preferred_loc;
>> - new->prefetch_loc = old->prefetch_loc;
>> - new->actual_loc = old->actual_loc;
>> - new->granularity = old->granularity;
>> - new->mapped_to_gpu = old->mapped_to_gpu;
>> - new->vram_pages = old->vram_pages;
>> - bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
>> - bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
>> -
>> - return new;
>> -}
>> -
>> void svm_range_set_max_pages(struct amdgpu_device *adev)
>> {
>> uint64_t max_pages;
>> @@ -2057,7 +2008,6 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
>> * @attrs: array of attributes
>> * @update_list: output, the ranges need validate and update GPU mapping
>> * @insert_list: output, the ranges need insert to svms
>> - * @remove_list: output, the ranges are replaced and need remove from svms
>> * @remap_list: output, remap unaligned svm ranges
>> *
>> * Check if the virtual address range has overlap with any existing ranges,
>> @@ -2082,7 +2032,7 @@ static int
>> svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>> uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
>> struct list_head *update_list, struct list_head *insert_list,
>> - struct list_head *remove_list, struct list_head *remap_list)
>> + struct list_head *remap_list)
>> {
>> unsigned long last = start + size - 1UL;
>> struct svm_range_list *svms = &p->svms;
>> @@ -2096,7 +2046,6 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>>
>> INIT_LIST_HEAD(update_list);
>> INIT_LIST_HEAD(insert_list);
>> - INIT_LIST_HEAD(remove_list);
>> INIT_LIST_HEAD(&new_list);
>> INIT_LIST_HEAD(remap_list);
>>
>> @@ -2117,20 +2066,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>> /* nothing to do */
>> } else if (node->start < start || node->last > last) {
>> /* node intersects the update range and its attributes
>> - * will change. Clone and split it, apply updates only
>> + * will change. Split it, apply updates only
>> * to the overlapping part
>> */
>> - struct svm_range *old = prange;
>> -
>> - prange = svm_range_clone(old);
>> - if (!prange) {
>> - r = -ENOMEM;
>> - goto out;
>> - }
>> -
>> - list_add(&old->update_list, remove_list);
>> - list_add(&prange->list, insert_list);
>> - list_add(&prange->update_list, update_list);
>> + list_move_tail(&prange->list, insert_list);
>> + list_move_tail(&prange->update_list, update_list);
>
> The main purpose to clone prange is for error handling rollback. If
> removing original prange from svms and update it on insert_list, how
> do you rollback to put prange back to svms after splitting prange error?
>
> We hold svms lock to access prange, so it is impossible to access
> prange after it is removed from svms.
>
ok, if purpose of clone prange is for error handling rollback when split
prange has error I understand.
This patch main purpose is not destroying pragne locks(prange->lock,
prange->migrate_mutex) since this prange is still alive. We should keep
its lock contexts. The cloned prange creates new locks that has nothing
related to original locks. So I think we need swap original prange with
cloned prange after cloned prange split success, with same internal data
member updates, then destroy cloned prange.
svm lock holding does not mean that prange lock can not be used in other
tasks. And at svm_range_add we do not remove any prange, but split it,
then add prange with new split-ted pranges to svm.
Regards
Xiaogang
> Regards,
>
> Philip
>
>>
>> if (node->start < start) {
>> pr_debug("change old range start\n");
>> @@ -3533,7 +3473,6 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>> struct amdkfd_process_info *process_info = p->kgd_process_info;
>> struct list_head update_list;
>> struct list_head insert_list;
>> - struct list_head remove_list;
>> struct list_head remap_list;
>> struct svm_range_list *svms;
>> struct svm_range *prange;
>> @@ -3563,10 +3502,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>> }
>>
>> mutex_lock(&svms->lock);
>> -
>> /* Add new range and split existing ranges as needed */
>> r = svm_range_add(p, start, size, nattr, attrs, &update_list,
>> - &insert_list, &remove_list, &remap_list);
>> + &insert_list, &remap_list);
>> if (r) {
>> mutex_unlock(&svms->lock);
>> mmap_write_unlock(mm);
>> @@ -3574,21 +3512,18 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>> }
>> /* Apply changes as a transaction */
>> list_for_each_entry_safe(prange, next, &insert_list, list) {
>> - svm_range_add_to_svms(prange);
>> - svm_range_add_notifier_locked(mm, prange);
>> + /* prange can be new or old range, put it at svms->list */
>> + list_move_tail(&prange->list, &prange->svms->list);
>> + /* update prange at interval trees: svms->objects and
>> + * mm interval notifier tree
>> + */
>> + svm_range_update_notifier_and_interval_tree(mm, prange);
>> }
>> +
>> list_for_each_entry(prange, &update_list, update_list) {
>> svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
>> /* TODO: unmap ranges from GPU that lost access */
>> }
>> - list_for_each_entry_safe(prange, next, &remove_list, update_list) {
>> - pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
>> - prange->svms, prange, prange->start,
>> - prange->last);
>> - svm_range_unlink(prange);
>> - svm_range_remove_notifier(prange);
>> - svm_range_free(prange, false);
>> - }
>>
>> mmap_write_downgrade(mm);
>> /* Trigger migrations and revalidate and map to GPUs as needed. If
More information about the amd-gfx
mailing list