[PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2
Yang, Philip
Philip.Yang at amd.com
Thu Dec 6 21:01:56 UTC 2018
On 2018-12-03 8:52 p.m., Kuehling, Felix wrote:
> See comments inline. I didn't review the amdgpu_cs and amdgpu_gem parts
> as I don't know them very well.
>
> On 2018-12-03 3:19 p.m., Yang, Philip wrote:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> To avoid circular lock dependency, no nested locking between mmap_sem
>> and bo::reserve. The locking order is:
>> bo::reserve -> amdgpu_mn_lock(p->mn)
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> v2:
>> * Remove nested locking between mmap_sem and bo::reserve
>> * Change locking order to bo::reserve -> amdgpu_mn_lock()
>> * Use dynamic allocation to replace VLA in kernel stack
>>
>> Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 101 ++++++++--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 188 +++++++++---------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 34 +++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 7 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 164 ++++++---------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 -
>> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++---
>> 11 files changed, 312 insertions(+), 272 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index f3129b912714..5ce6ba24fc72 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>> amdgpu_bo_unreserve(bo);
>>
>> release_out:
>> - if (ret)
>> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>> free_out:
>> kvfree(mem->user_pages);
>> mem->user_pages = NULL;
>> @@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>> ctx->kfd_bo.priority = 0;
>> ctx->kfd_bo.tv.bo = &bo->tbo;
>> ctx->kfd_bo.tv.shared = true;
>> - ctx->kfd_bo.user_pages = NULL;
>> list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>
>> amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
>> @@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>> ctx->kfd_bo.priority = 0;
>> ctx->kfd_bo.tv.bo = &bo->tbo;
>> ctx->kfd_bo.tv.shared = true;
>> - ctx->kfd_bo.user_pages = NULL;
>> list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>
>> i = 0;
>> @@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>> /* Free user pages if necessary */
>> if (mem->user_pages) {
>> pr_debug("%s: Freeing user_pages array\n", __func__);
>> - if (mem->user_pages[0])
>> - release_pages(mem->user_pages,
>> - mem->bo->tbo.ttm->num_pages);
>> kvfree(mem->user_pages);
>> }
>>
>> @@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>> __func__);
>> return -ENOMEM;
>> }
>> - } else if (mem->user_pages[0]) {
>> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> }
>>
>> /* Get updated user pages */
>> @@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>> * stalled user mode queues.
>> */
>> }
>> -
>> - /* Mark the BO as valid unless it was invalidated
>> - * again concurrently
>> - */
>> - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
>> - return -EAGAIN;
>> }
>>
>> return 0;
>> @@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>> }
>>
>> /* Validate succeeded, now the BO owns the pages, free
>> - * our copy of the pointer array. Put this BO back on
>> - * the userptr_valid_list. If we need to revalidate
>> - * it, we need to start from scratch.
>> + * our copy of the pointer array.
>> */
>> kvfree(mem->user_pages);
>> mem->user_pages = NULL;
>> - list_move_tail(&mem->validate_list.head,
>> - &process_info->userptr_valid_list);
>>
>> /* Update mapping. If the BO was not validated
>> * (because we couldn't get user pages), this will
>> @@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>> return ret;
>> }
>>
>> +/* user_pages_invalidated - if CPU page table is updated after getting user
>> + * pages
>> + *
>> + * HMM mirror callback lock amn is hold to prevent the concurrent CPU
>> + * page table update while resuming user queues.
>> + *
>> + * Returns: true if CPU page table is updated, false otherwise
>> + */
>> +static int user_pages_invalidated(struct mm_struct *mm,
>> + struct amdkfd_process_info *process_info,
>> + struct amdgpu_mn **amn)
>> +{
>> + struct kgd_mem *mem, *tmp_mem;
>> + struct amdgpu_bo *bo;
>> + struct amdgpu_device *adev;
>> + int invalid, r = 0;
>> +
>> + list_for_each_entry_safe(mem, tmp_mem,
>> + &process_info->userptr_inval_list,
>> + validate_list.head) {
>> +
>> + invalid = atomic_read(&mem->invalid);
>> + if (!invalid)
>> + /* BO hasn't been invalidated since the last
>> + * revalidation attempt. Keep its BO list.
>> + */
>> + continue;
>> +
>> + bo = mem->bo;
>> +
>> + /* Get HMM mirror callback lock */
>> + if (!*amn) {
>> + adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> + *amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA);
>> + if (IS_ERR(*amn)) {
>> + r = true;
>> + *amn = NULL;
>> + goto out;
>> + }
>> +
>> + amdgpu_mn_lock(*amn);
> There can be multiple GPUs involved here. So you'd need more than one
> amn lock. I think you're trying to use the amn lock to serialize page
> table updates. But that's probably not the right lock. What we should be
> using for this is the page directory reservation locks. We're already
> taking them for all affected GPUs in validate_invalid_user_pages. And
> that's where you should also be calling amdgpu_ttm_tt_get_user_pages_done.
>
Yes, this is incorrect for mGPU. Remove this function completely and add
calling amdgpu_tmm_tt_get_user_pages_done inside validate_invalid_user_pages
while holding the reservation locks.
>> + }
>> +
>> + r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> This needs to be called while holding the "driver lock that serializes
> device page table updates". I think in our case that's the page
> directory reservation. You're not holding that here.
>
>
>> +
>> + /* Put this BO back on the userptr_valid_list. If we need to
>> + * revalidate it, we need to start from scratch.
>> + */
>> + list_move_tail(&mem->validate_list.head,
>> + &process_info->userptr_valid_list);
> Doing this unconditionally here looks wrong. Also, this was already done
> in validate_invalid_user_pages. So I'm wondering what's the point of
> this function. In most cases the process_info->userptr_inval_list will
> be empty here to begin with.
>
> I think you should probably call amdgpu_ttm_tt_get_user_pages_done in
> validate_invalid_user_pages while holding the PD reservation lock and
> where you call amdgpu_ttm_tt_set_user_pages.
>
>
>> +
>> + /* Mark the BO as valid unless it was invalidated
>> + * again concurrently
>> + */
>> + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) {
>> + r = true;
>> + goto out;
>> + }
>> + }
>> +
>> +out:
>> + return r;
>> +}
>> +
>> /* Worker callback to restore evicted userptr BOs
>> *
>> * Tries to update and validate all userptr BOs. If successful and no
>> @@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>> struct task_struct *usertask;
>> struct mm_struct *mm;
>> int evicted_bos;
>> + struct amdgpu_mn *amn = NULL;
>>
>> evicted_bos = atomic_read(&process_info->evicted_bos);
>> if (!evicted_bos)
>> @@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>> if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
>> evicted_bos)
>> goto unlock_out;
>> +
>> + /* If CPU page table is updated again after getting user pages,
>> + * schedule to restart the restore process again.
>> + *
>> + * amn is also locked to prevent CPU page table update while resuming
>> + * user queues. amn is unlocked after user queues are resumed.
>> + */
>> + if (user_pages_invalidated(mm, process_info, &amn))
>> + goto unlock_mn_out;
>> +
>> evicted_bos = 0;
>> +
>> if (kgd2kfd->resume_mm(mm)) {
>> pr_err("%s: Failed to resume KFD\n", __func__);
>> /* No recovery from this failure. Probably the CP is
>> * hanging. No point trying again.
>> */
>> }
>> +
>> +unlock_mn_out:
>> + amdgpu_mn_unlock(amn);
>> unlock_out:
>> mutex_unlock(&process_info->lock);
>> mmput(mm);
> [snip]
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 56595b3d90d2..6b6becdb725b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>> true, false, MAX_SCHEDULE_TIMEOUT);
>> if (r <= 0)
>> DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>> -
>> - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>> }
>> }
>>
>> @@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>> /**
>> * amdgpu_mn_get - create HMM mirror context
>> *
>> + * @mm: the mm struct
>> * @adev: amdgpu device pointer
>> * @type: type of MMU notifier context
>> *
>> - * Creates a HMM mirror context for current->mm.
>> + * Creates a HMM mirror context for mm.
>> */
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> + struct amdgpu_device *adev,
>> enum amdgpu_mn_type type)
>> {
>> - struct mm_struct *mm = current->mm;
>> struct amdgpu_mn *amn;
>> unsigned long key = AMDGPU_MN_KEY(mm, type);
>> int r;
>> @@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> struct list_head bos;
>> struct interval_tree_node *it;
>>
>> - amn = amdgpu_mn_get(adev, type);
>> + amn = amdgpu_mn_get(current->mm, adev, type);
>> if (IS_ERR(amn))
>> return PTR_ERR(amn);
>>
>> @@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>> mutex_unlock(&adev->mn_lock);
>> }
>>
>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
>> + (1 << 0), /* HMM_PFN_VALID */
>> + (1 << 1), /* HMM_PFN_WRITE */
>> + 0 /* HMM_PFN_DEVICE_PRIVATE */
>> +};
>> +
>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
>> + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
>> + 0, /* HMM_PFN_NONE */
>> + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
>> +};
>> +
>> +void amdgpu_hmm_init_range(struct hmm_range *range)
>> +{
>> + if (range) {
>> + range->flags = hmm_range_flags;
>> + range->values = hmm_range_values;
>> + range->pfn_shift = PAGE_SHIFT;
>> + range->pfns = NULL;
>> + INIT_LIST_HEAD(&range->list);
>> + }
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index 0e2752646e6f..59ea30e149bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -25,9 +25,10 @@
>> #define __AMDGPU_MN_H__
>>
>> /*
>> - * MMU Notifier
>> + * HMM mirror
>> */
>> struct amdgpu_mn;
>> +struct hmm_range;
>>
>> enum amdgpu_mn_type {
>> AMDGPU_MN_TYPE_GFX,
>> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>> #if defined(CONFIG_HMM)
>> void amdgpu_mn_lock(struct amdgpu_mn *mn);
>> void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> + struct amdgpu_device *adev,
>> enum amdgpu_mn_type type);
>> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>> void amdgpu_mn_unregister(struct amdgpu_bo *bo);
>> +void amdgpu_hmm_init_range(struct hmm_range *range);
>> #else
>> static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>> static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c91ec3101d00..4a1cb4d15dfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -43,6 +43,7 @@
>> #include <linux/pagemap.h>
>> #include <linux/debugfs.h>
>> #include <linux/iommu.h>
>> +#include <linux/hmm.h>
>> #include "amdgpu.h"
>> #include "amdgpu_object.h"
>> #include "amdgpu_trace.h"
>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>> /*
>> * TTM backend functions.
>> */
>> -struct amdgpu_ttm_gup_task_list {
>> - struct list_head list;
>> - struct task_struct *task;
>> -};
>> -
>> struct amdgpu_ttm_tt {
>> struct ttm_dma_tt ttm;
>> u64 offset;
>> @@ -718,85 +714,91 @@ struct amdgpu_ttm_tt {
>> uint32_t userflags;
>> spinlock_t guptasklock;
>> struct list_head guptasks;
>> - atomic_t mmu_invalidations;
>> - uint32_t last_set_pages;
>> + struct hmm_range range;
>> };
>>
>> /**
>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
>> - * pointer to memory
>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
>> + * memory and start HMM tracking CPU page table update
>> *
>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
>> - * This provides a wrapper around the get_user_pages() call to provide
>> - * device accessible pages that back user memory.
>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
>> + * once afterwards to stop HMM tracking
>> */
>> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>> {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> struct mm_struct *mm = gtt->usertask->mm;
>> - unsigned int flags = 0;
>> - unsigned pinned = 0;
>> - int r;
>> + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> + struct hmm_range *range = >t->range;
>> + int r, i;
>>
>> if (!mm) /* Happens during process shutdown */
>> return -ESRCH;
>>
>> - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> - flags |= FOLL_WRITE;
>> + amdgpu_hmm_init_range(range);
>>
>> down_read(&mm->mmap_sem);
>>
>> - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>> - /*
>> - * check that we only use anonymous memory to prevent problems
>> - * with writeback
>> - */
>> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> - struct vm_area_struct *vma;
>> + range->vma = find_vma(mm, gtt->userptr);
>> + if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) {
> I think this breaks file-backed mappings and also adds a new limitation
> that the address range has to be a single VMA. This was previously only
> required it gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY.
Previously this vma is for sanity check only because get_user_pages do
not need pass in vma.
now get_vma_fault expect a valid vma in range struct, change to keep
previous check. Thanks.
>> + r = -EPERM;
>> + goto out;
>> + }
>> + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> + GFP_KERNEL | __GFP_ZERO);
> Is GFP_ZERO really needed here? The loop below initializes all pfns
> explicitly.
GFP_ZERO is not needed.
>
>> + if (range->pfns == NULL) {
>> + r = -ENOMEM;
>> + goto out;
>> + }
>> + range->start = gtt->userptr;
>> + range->end = end;
>>
>> - vma = find_vma(mm, gtt->userptr);
>> - if (!vma || vma->vm_file || vma->vm_end < end) {
>> - up_read(&mm->mmap_sem);
>> - return -EPERM;
>> - }
>> + for (i = 0; i < ttm->num_pages; i++) {
>> + range->pfns[i] = range->flags[HMM_PFN_VALID];
>> + range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> + 0 : range->flags[HMM_PFN_WRITE];
>> }
>>
>> - /* loop enough times using contiguous pages of memory */
>> - do {
>> - unsigned num_pages = ttm->num_pages - pinned;
>> - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
>> - struct page **p = pages + pinned;
>> - struct amdgpu_ttm_gup_task_list guptask;
>> + /* This may triggles page table update */
> Do you mean "trigger"?
>
>
>> + r = hmm_vma_fault(range, true);
>> + if (r)
>> + goto out_free_pfns;
>>
>> - guptask.task = current;
>> - spin_lock(>t->guptasklock);
>> - list_add(&guptask.list, >t->guptasks);
>> - spin_unlock(>t->guptasklock);
>> + for (i = 0; i < ttm->num_pages; i++)
>> + pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>>
>> - if (mm == current->mm)
>> - r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> - else
>> - r = get_user_pages_remote(gtt->usertask,
>> - mm, userptr, num_pages,
>> - flags, p, NULL, NULL);
>> + up_read(&mm->mmap_sem);
> You can probably up_read before the loop.
>
> Also, why do you defer freeing of range->pfns to
> amdgpu_ttm_tt_get_user_pages_done? The array isn't used for anything
> else later, so you could free it here.
>
> Or instead, don't free it and use it to replace the pages array. It
> seems wasteful to maintain two pages arrays and constantly allocate and
> deallocate one of them.
range->pfns will be used later by hmm_vma_range_done() in
amdgpu_ttm_tt_get_user_pages_done
>
>> + return 0;
>>
>> - spin_lock(>t->guptasklock);
>> - list_del(&guptask.list);
>> - spin_unlock(>t->guptasklock);
>> +out_free_pfns:
>> + kvfree(range->pfns);
>> + range->pfns = NULL;
> Setting range->pfns = NULL is not needed. This is a local variable
> that's about to go out of scope anyway.
range is a local pointer to gtt->range which will be freed in
amdgpu_ttm_tt_get_user_pages_done, assign to NULL to avoid double free.
>
>> +out:
>> + up_read(&mm->mmap_sem);
>> + return r;
>> +}
>>
>> - if (r < 0)
>> - goto release_pages;
>> +/**
>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
>> + * Check if the pages backing this ttm range have been invalidated
>> + *
>> + * Returns: true if pages are invalidated since the last time they've been set
>> + */
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>> +{
>> + struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> + int r;
>>
>> - pinned += r;
>> + if (gtt == NULL || !gtt->userptr)
>> + return false;
>>
>> - } while (pinned < ttm->num_pages);
>> + r = !hmm_vma_range_done(>t->range);
>>
>> - up_read(&mm->mmap_sem);
>> - return 0;
>> + if (gtt->range.pfns) {
>> + kvfree(gtt->range.pfns);
>> + gtt->range.pfns = NULL;
>> + }
>>
>> -release_pages:
>> - release_pages(pages, pinned);
>> - up_read(&mm->mmap_sem);
>> return r;
>> }
>>
>> @@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>> */
>> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
> Does this function still make sense? It just copies a pages array. Could
> it instead use gtt->range->pfns directly? And why does this still have
> to be separate from amdgpu_ttm_tt_get_user_pages? I think originally it
> was separate because we needed to keep track of the original pages for
> unpinning them. But we're no longer pinning and unpinning pages.
>
Yes, remove mem->user_pages completely, use bo->tbo.ttm->pages instead.
>> {
>> - struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> unsigned i;
>>
>> - gtt->last_set_pages = atomic_read(>t->mmu_invalidations);
>> - for (i = 0; i < ttm->num_pages; ++i) {
>> - if (ttm->pages[i])
>> - put_page(ttm->pages[i]);
>> -
>> + for (i = 0; i < ttm->num_pages; ++i)
>> ttm->pages[i] = pages ? pages[i] : NULL;
> If this is still needed, it could just be a memcpy now.
>
>> - }
>> }
>>
>> /**
>> @@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>> /* unmap the pages mapped to the device */
>> dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>>
>> - /* mark the pages as dirty */
>> - amdgpu_ttm_tt_mark_user_pages(ttm);
>> -
>> sg_free_table(ttm->sg);
>> }
>>
>> @@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>
>> spin_lock_init(>t->guptasklock);
>> INIT_LIST_HEAD(>t->guptasks);
>> - atomic_set(>t->mmu_invalidations, 0);
>> - gtt->last_set_pages = 0;
>>
>> return 0;
>> }
>> @@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>> unsigned long end)
>> {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> - struct amdgpu_ttm_gup_task_list *entry;
>> unsigned long size;
>>
>> if (gtt == NULL || !gtt->userptr)
>> @@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>> if (gtt->userptr > end || gtt->userptr + size <= start)
>> return false;
>>
>> - /* Search the lists of tasks that hold this mapping and see
>> - * if current is one of them. If it is return false.
>> - */
>> - spin_lock(>t->guptasklock);
>> - list_for_each_entry(entry, >t->guptasks, list) {
>> - if (entry->task == current) {
>> - spin_unlock(>t->guptasklock);
>> - return false;
>> - }
>> - }
>> - spin_unlock(>t->guptasklock);
>> -
>> - atomic_inc(>t->mmu_invalidations);
>> -
>> return true;
>> }
>>
>> /**
>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>> */
>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>> - int *last_invalidated)
>> -{
>> - struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> - int prev_invalidated = *last_invalidated;
>> -
>> - *last_invalidated = atomic_read(>t->mmu_invalidations);
>> - return prev_invalidated != *last_invalidated;
>> -}
>> -
>> -/**
>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
>> - * been invalidated since the last time they've been set?
>> - */
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>> {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>
>> if (gtt == NULL || !gtt->userptr)
>> return false;
>>
>> - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages;
>> + return true;
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index b5b2d101f7db..8988c87fff9d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>> int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>
>> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>> void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>> int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>> unsigned long end);
>> bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>> int *last_invalidated);
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>> bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>> uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>> uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 2acb9838913e..4a67a88acea1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>> entry->priority = 0;
>> entry->tv.bo = &vm->root.base.bo->tbo;
>> entry->tv.shared = true;
>> - entry->user_pages = NULL;
>> list_add(&entry->tv.head, validated);
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 8372556b52eb..fe120cc0930c 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> Please make everything below a separate commit.
Ok, submit separate patch for the changes below.
> Regards,
> Felix
>
>
>> @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>>
>> retval = 0;
>>
>> + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
>> + * lock(dqm) -> bo::reserve
>> + */
>> + mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>> + get_mqd_type_from_queue_type(q->properties.type));
>> +
>> + if (!mqd_mgr) {
>> + retval = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + /*
>> + * Eviction state logic: we only mark active queues as evicted
>> + * to avoid the overhead of restoring inactive queues later
>> + */
>> + if (qpd->evicted)
>> + q->properties.is_evicted = (q->properties.queue_size > 0 &&
>> + q->properties.queue_percent > 0 &&
>> + q->properties.queue_address != 0);
>> + dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
>> + q->properties.tba_addr = qpd->tba_addr;
>> + q->properties.tma_addr = qpd->tma_addr;
>> + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
>> + &q->gart_mqd_addr, &q->properties);
>> + if (retval)
>> + goto out;
>> +
>> dqm_lock(dqm);
>>
>> if (dqm->total_queue_count >= max_num_of_queues_per_device) {
>> @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>> if (retval)
>> goto out_deallocate_sdma_queue;
>>
>> - mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>> - get_mqd_type_from_queue_type(q->properties.type));
>> -
>> - if (!mqd_mgr) {
>> - retval = -ENOMEM;
>> - goto out_deallocate_doorbell;
>> - }
>> - /*
>> - * Eviction state logic: we only mark active queues as evicted
>> - * to avoid the overhead of restoring inactive queues later
>> - */
>> - if (qpd->evicted)
>> - q->properties.is_evicted = (q->properties.queue_size > 0 &&
>> - q->properties.queue_percent > 0 &&
>> - q->properties.queue_address != 0);
>> -
>> - dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
>> -
>> - q->properties.tba_addr = qpd->tba_addr;
>> - q->properties.tma_addr = qpd->tma_addr;
>> - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
>> - &q->gart_mqd_addr, &q->properties);
>> - if (retval)
>> - goto out_deallocate_doorbell;
>>
>> list_add(&q->list, &qpd->queues_list);
>> qpd->queue_count++;
>> @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>> dqm_unlock(dqm);
>> return retval;
>>
>> -out_deallocate_doorbell:
>> - deallocate_doorbell(qpd, q);
>> out_deallocate_sdma_queue:
>> if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>> deallocate_sdma_queue(dqm, q->sdma_id);
>> out_unlock:
>> dqm_unlock(dqm);
>> -
>> +out:
>> return retval;
>> }
>>
>> @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>> qpd->reset_wavefronts = true;
>> }
>>
>> - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>> -
>> /*
>> * Unconditionally decrement this counter, regardless of the queue's
>> * type
>> @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>>
>> dqm_unlock(dqm);
>>
>> + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
>> + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>> +
>> return retval;
>>
>> failed:
>> @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>> qpd->reset_wavefronts = false;
>> }
>>
>> - /* lastly, free mqd resources */
>> + dqm_unlock(dqm);
>> +
>> + /* Lastly, free mqd resources.
>> + * Do uninit_mqd() after dqm_unlock to avoid circular locking.
>> + */
>> list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
>> mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>> get_mqd_type_from_queue_type(q->properties.type));
>> @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>> }
>>
>> out:
>> - dqm_unlock(dqm);
>> return retval;
>> }
>>
More information about the amd-gfx
mailing list