[PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath

Andrey Grodzovsky andrey.grodzovsky at amd.com
Wed Sep 15 20:14:46 UTC 2021


I fixed 2 regressions and latest code, applied your patch on top and 
passed libdrm tests
on Vega 10. You can pickup those 2 patches and try too if you have time. 
In any case -

Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>

Andrey

On 2021-09-15 2:37 a.m., xinhui pan wrote:
> We hit soft hang while doing memory pressure test on one numa system.
> After a qucik look, this is because kfd invalid/valid userptr memory
> frequently with process_info lock hold.
> Looks like update page table mapping use too much cpu time.
>
> perf top says below,
> 75.81%  [kernel]       [k] __srcu_read_unlock
>   6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
>   3.56%  [kernel]       [k] __srcu_read_lock
>   2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
>   2.20%  [kernel]       [k] __sg_page_iter_dma_next
>   2.15%  [drm]          [k] drm_dev_enter
>   1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
>   1.18%  [kernel]       [k] __sg_alloc_table_from_pages
>   1.09%  [drm]          [k] drm_dev_exit
>
> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> change from v1:
> add enter/exit in more gmc_set_pte_pde callers
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
>   3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 76efd5f8950f..d7e4f4660acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -34,6 +34,7 @@
>   #include <asm/set_memory.h>
>   #endif
>   #include "amdgpu.h"
> +#include <drm/drm_drv.h>
>   
>   /*
>    * GART
> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	u64 page_base;
>   	/* Starting from VEGA10, system bit must be 0 to mean invalid. */
>   	uint64_t flags = 0;
> +	int idx;
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to unbind memory from uninitialized GART !\n");
>   		return -EINVAL;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return 0;
> +
>   	t = offset / AMDGPU_GPU_PAGE_SIZE;
>   	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>   	for (i = 0; i < pages; i++, p++) {
> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	for (i = 0; i < adev->num_vmhubs; i++)
>   		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>   
> +	drm_dev_exit(idx);
>   	return 0;
>   }
>   
> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>   {
>   	uint64_t page_base;
>   	unsigned i, j, t;
> +	int idx;
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to bind memory to uninitialized GART !\n");
>   		return -EINVAL;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return 0;
> +
>   	t = offset / AMDGPU_GPU_PAGE_SIZE;
>   
>   	for (i = 0; i < pages; i++) {
> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>   			page_base += AMDGPU_GPU_PAGE_SIZE;
>   		}
>   	}
> +	drm_dev_exit(idx);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 54f059501a33..1427fd70310c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>   {
>   	void __iomem *ptr = (void *)cpu_pt_addr;
>   	uint64_t value;
> -	int idx;
> -
> -	if (!drm_dev_enter(&adev->ddev, &idx))
> -		return 0;
>   
>   	/*
>   	 * The following is for PTE only. GART does not have PDEs.
> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>   	value |= flags;
>   	writeq(value, ptr + (gpu_page_idx * 8));
>   
> -	drm_dev_exit(idx);
> -
>   	return 0;
>   }
>   
> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>   		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
>   	u64 vram_end = vram_addr + vram_size;
>   	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
> +	int idx;
> +
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return;
>   
>   	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>   	flags |= AMDGPU_PTE_WRITEABLE;
> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>   	flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>   	/* Requires gart_ptb_gpu_pa to be 4K aligned */
>   	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
> +	drm_dev_exit(idx);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0126ece898da..daa16d2f89da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   	struct amdgpu_bo *bo = &vmbo->bo;
>   	unsigned entries, ats_entries;
>   	uint64_t addr;
> -	int r;
> +	int r, idx;
>   
>   	/* Figure out our place in the hierarchy */
>   	if (ancestor->parent) {
> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   			return r;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return -ENODEV;
> +
>   	r = vm->update_funcs->map_table(vmbo);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   
>   	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	addr = 0;
>   	if (ats_entries) {
> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   		r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
>   					     value, flags);
>   		if (r)
> -			return r;
> +			goto exit;
>   
>   		addr += ats_entries * 8;
>   	}
> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   		r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
>   					     value, flags);
>   		if (r)
> -			return r;
> +			goto exit;
>   	}
>   
> -	return vm->update_funcs->commit(&params, NULL);
> +	r = vm->update_funcs->commit(&params, NULL);
> +exit:
> +	drm_dev_exit(idx);
> +	return r;
>   }
>   
>   /**
> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   			  struct amdgpu_vm *vm, bool immediate)
>   {
>   	struct amdgpu_vm_update_params params;
> -	int r;
> +	int r, idx;
>   
>   	if (list_empty(&vm->relocated))
>   		return 0;
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return -ENODEV;
> +
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
>   	params.vm = vm;
> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   
>   	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	while (!list_empty(&vm->relocated)) {
>   		struct amdgpu_vm_bo_base *entry;
> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, &vm->last_update);
>   	if (r)
>   		goto error;
> +	drm_dev_exit(idx);
>   	return 0;
>   
>   error:
>   	amdgpu_vm_invalidate_pds(adev, vm);
> +exit:
> +	drm_dev_exit(idx);
>   	return r;
>   }
>   


More information about the amd-gfx mailing list