[PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
Chen, Guchun
Guchun.Chen at amd.com
Tue Mar 23 09:52:39 UTC 2021
[AMD Public Use]
Hi Christian,
Thanks for your patience.
Unluckily, after applying below patch, vulkan cts test on my side is negative. The same gfxhub page fault and kernel bug along with amdgpu_vm_update_ptes calltrace is observed. I will send the full log to you privately soon.
I suggest holding on this patch before rooting cause it.
Regards,
Guchun
-----Original Message-----
From: Das, Nirmoy <Nirmoy.Das at amd.com>
Sent: Tuesday, March 23, 2021 5:09 PM
To: Chen, Guchun <Guchun.Chen at amd.com>; Christian König <ckoenig.leichtzumerken at gmail.com>; amd-gfx at lists.freedesktop.org
Cc: Das, Nirmoy <Nirmoy.Das at amd.com>
Subject: Re: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
I tested ./piglit run opengl results/test multiple times. Once I got gfx time out
error but without kernel freeze. I can't reproduce it any more.
Regards,
Nirmoy
On 3/22/21 2:11 PM, Chen, Guchun wrote:
> [AMD Public Use]
>
> Hi Christian,
>
> I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting?
>
> Regards,
> Guchun
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken at gmail.com>
> Sent: Monday, March 22, 2021 8:41 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chen, Guchun <Guchun.Chen at amd.com>; Das, Nirmoy
> <Nirmoy.Das at amd.com>
> Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
>
> Now that we found the underlying problem we can re-apply this patch.
>
> This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++-----------------
> 1 file changed, 18 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9268db1172bd..bc3951b71079 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -37,6 +37,7 @@
> #include "amdgpu_gmc.h"
> #include "amdgpu_xgmi.h"
> #include "amdgpu_dma_buf.h"
> +#include "amdgpu_res_cursor.h"
>
> /**
> * DOC: GPUVM
> @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
> * @last: last mapped entry
> * @flags: flags for the entries
> * @offset: offset into nodes and pages_addr
> - * @nodes: array of drm_mm_nodes with the MC addresses
> + * @res: ttm_resource to map
> * @pages_addr: DMA addresses to use for mapping
> * @fence: optional resulting fence
> *
> @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> bool unlocked, struct dma_resv *resv,
> uint64_t start, uint64_t last,
> uint64_t flags, uint64_t offset,
> - struct drm_mm_node *nodes,
> + struct ttm_resource *res,
> dma_addr_t *pages_addr,
> struct dma_fence **fence)
> {
> struct amdgpu_vm_update_params params;
> + struct amdgpu_res_cursor cursor;
> enum amdgpu_sync_mode sync_mode;
> - uint64_t pfn;
> int r;
>
> memset(¶ms, 0, sizeof(params)); @@ -1622,14 +1623,6 @@ static
> int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> else
> sync_mode = AMDGPU_SYNC_EXPLICIT;
>
> - pfn = offset >> PAGE_SHIFT;
> - if (nodes) {
> - while (pfn >= nodes->size) {
> - pfn -= nodes->size;
> - ++nodes;
> - }
> - }
> -
> amdgpu_vm_eviction_lock(vm);
> if (vm->evicting) {
> r = -EBUSY;
> @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (r)
> goto error_unlock;
>
> - do {
> + amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
> + &cursor);
> + while (cursor.remaining) {
> uint64_t tmp, num_entries, addr;
>
> -
> - num_entries = last - start + 1;
> - if (nodes) {
> - addr = nodes->start << PAGE_SHIFT;
> - num_entries = min((nodes->size - pfn) *
> - AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
> - } else {
> - addr = 0;
> - }
> -
> + num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
> if (pages_addr) {
> bool contiguous = true;
>
> if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
> + uint64_t pfn = cursor.start >> PAGE_SHIFT;
> uint64_t count;
>
> contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> }
>
> if (!contiguous) {
> - addr = pfn << PAGE_SHIFT;
> + addr = cursor.start;
> params.pages_addr = pages_addr;
> } else {
> - addr = pages_addr[pfn];
> + addr = pages_addr[cursor.start >> PAGE_SHIFT];
> params.pages_addr = NULL;
> }
>
> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> - addr += bo_adev->vm_manager.vram_base_offset;
> - addr += pfn << PAGE_SHIFT;
> + addr = bo_adev->vm_manager.vram_base_offset +
> + cursor.start;
> + } else {
> + addr = 0;
> }
>
> tmp = start + num_entries;
> @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (r)
> goto error_unlock;
>
> - pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
> - if (nodes && nodes->size == pfn) {
> - pfn = 0;
> - ++nodes;
> - }
> + amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
> start = tmp;
> -
> - } while (unlikely(start != last + 1));
> + };
>
> r = vm->update_funcs->commit(¶ms, fence);
>
> @@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> struct amdgpu_bo_va_mapping *mapping;
> dma_addr_t *pages_addr = NULL;
> struct ttm_resource *mem;
> - struct drm_mm_node *nodes;
> struct dma_fence **last_update;
> struct dma_resv *resv;
> uint64_t flags;
> @@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
> *adev, struct amdgpu_bo_va *bo_va,
>
> if (clear || !bo) {
> mem = NULL;
> - nodes = NULL;
> resv = vm->root.base.bo->tbo.base.resv;
> } else {
> struct drm_gem_object *obj = &bo->tbo.base; @@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> bo = gem_to_amdgpu_bo(gobj);
> }
> mem = &bo->tbo.mem;
> - nodes = mem->mm_node;
> if (mem->mem_type == TTM_PL_TT)
> pages_addr = bo->tbo.ttm->dma_address;
> }
> @@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
> resv, mapping->start,
> mapping->last, update_flags,
> - mapping->offset, nodes,
> + mapping->offset, mem,
> pages_addr, last_update);
> if (r)
> return r;
> --
> 2.25.1
More information about the amd-gfx
mailing list