[PATCH v3] drm/amdgpu: Add debugfs entry for printing VM info

Christian König christian.koenig at amd.com
Mon Oct 12 09:10:57 UTC 2020


Am 12.10.20 um 11:01 schrieb Mihir Patel:
> From: Mihir Bhogilal Patel <Mihir.Patel at amd.com>
>
> Create new debugfs entry to print memory info using VM buffer
> objects.
>
> Pending:
> - Consolidated memory utilization info like total, swap etc.
>
> V2: Added Common function for printing BO info.
>      Dump more VM lists for evicted, moved, relocated, invalidated.
>      Removed dumping VM mapped BOs.
> V3: Fixed coding style comments, renamed print API and variables.
>
> Signed-off-by: Mihir Bhogilal Patel <Mihir.Patel at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 30 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     | 69 ++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 74 +++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 88 +++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  4 +
>   6 files changed, 204 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 2d125b8b15ee..0b41b8b72ba3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1335,11 +1335,41 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
>   	return 0;
>   }
>   
> +static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data)
> +{
> +	struct drm_info_node *node = (struct drm_info_node *)m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_file *file;
> +	int r;
> +
> +	r = mutex_lock_interruptible(&dev->filelist_mutex);
> +	if (r)
> +		return r;
> +
> +	list_for_each_entry(file, &dev->filelist, lhead) {
> +		struct amdgpu_fpriv *fpriv = file->driver_priv;
> +		struct amdgpu_vm *vm = &fpriv->vm;
> +
> +		seq_printf(m, "pid:%d\tProcess:%s ----------\n",
> +				vm->task_info.pid, vm->task_info.process_name);
> +		r = amdgpu_bo_reserve(vm->root.base.bo, true);
> +		if (r)
> +			continue;

I would rather say break here and return r from the function as well.

When reading this debugfs file is aborted using ctrl+c we would 
otherwise just continue with the loop.

> +		amdgpu_debugfs_vm_bo_info(vm, m);
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
> +
> +	mutex_unlock(&dev->filelist_mutex);
> +
> +	return 0;
> +}
> +
>   static const struct drm_info_list amdgpu_debugfs_list[] = {
>   	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
>   	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
>   	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
>   	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
> +	{"amdgpu_vm_info", &amdgpu_debugfs_vm_info},
>   };
>   
>   static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index f4c2e2e75b8f..6197c5ce744c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -826,67 +826,6 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   }
>   
>   #if defined(CONFIG_DEBUG_FS)
> -
> -#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)	\
> -	if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\
> -		seq_printf((m), " " #flag);		\
> -	}
> -
> -static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
> -{
> -	struct drm_gem_object *gobj = ptr;
> -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> -	struct seq_file *m = data;
> -
> -	struct dma_buf_attachment *attachment;
> -	struct dma_buf *dma_buf;
> -	unsigned domain;
> -	const char *placement;
> -	unsigned pin_count;
> -
> -	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> -	switch (domain) {
> -	case AMDGPU_GEM_DOMAIN_VRAM:
> -		placement = "VRAM";
> -		break;
> -	case AMDGPU_GEM_DOMAIN_GTT:
> -		placement = " GTT";
> -		break;
> -	case AMDGPU_GEM_DOMAIN_CPU:
> -	default:
> -		placement = " CPU";
> -		break;
> -	}
> -	seq_printf(m, "\t0x%08x: %12ld byte %s",
> -		   id, amdgpu_bo_size(bo), placement);
> -
> -	pin_count = READ_ONCE(bo->pin_count);
> -	if (pin_count)
> -		seq_printf(m, " pin count %d", pin_count);
> -
> -	dma_buf = READ_ONCE(bo->tbo.base.dma_buf);
> -	attachment = READ_ONCE(bo->tbo.base.import_attach);
> -
> -	if (attachment)
> -		seq_printf(m, " imported from %p%s", dma_buf,
> -			   attachment->peer2peer ? " P2P" : "");
> -	else if (dma_buf)
> -		seq_printf(m, " exported as %p", dma_buf);
> -
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
> -	amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
> -
> -	seq_printf(m, "\n");
> -
> -	return 0;
> -}
> -
>   static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
>   {
>   	struct drm_info_node *node = (struct drm_info_node *)m->private;
> @@ -900,6 +839,8 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
>   
>   	list_for_each_entry(file, &dev->filelist, lhead) {
>   		struct task_struct *task;
> +		struct drm_gem_object *gobj;
> +		int id;
>   
>   		/*
>   		 * Although we have a valid reference on file->pid, that does
> @@ -914,7 +855,11 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
>   		rcu_read_unlock();
>   
>   		spin_lock(&file->table_lock);
> -		idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
> +		idr_for_each_entry(&file->object_idr, gobj, id) {
> +			struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> +
> +			amdgpu_bo_print_info(id, bo, m);
> +		}
>   		spin_unlock(&file->table_lock);
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 2ce79bccfc30..f4142b7a3c59 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1528,3 +1528,77 @@ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
>   	}
>   	return domain;
>   }
> +
> +#if defined(CONFIG_DEBUG_FS)
> +#define amdgpu_bo_print_flag(m, bo, flag)		        \
> +	do {							\
> +		if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\
> +			seq_printf((m), " " #flag);		\
> +		}						\
> +	} while (0)
> +
> +/**
> + * amdgpu_debugfs_print_bo_info - print BO info in debugfs file
> + *
> + * @id: Index or Id of the BO
> + * @bo: Requested BO for printing info
> + * @data: debugfs file
> + *
> + * Print BO information in debugfs file
> + *
> + * Returns:
> + * Size of the BO in bytes.
> + */
> +unsigned long amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)

Please use uint64_t or u64 for the return type.

> +{
> +	struct dma_buf_attachment *attachment;
> +	struct dma_buf *dma_buf;
> +	unsigned int domain;
> +	const char *placement;
> +	unsigned int pin_count;
> +	unsigned long size;

Same here of course.

> +
> +	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> +	switch (domain) {
> +	case AMDGPU_GEM_DOMAIN_VRAM:
> +		placement = "VRAM";
> +		break;
> +	case AMDGPU_GEM_DOMAIN_GTT:
> +		placement = " GTT";
> +		break;
> +	case AMDGPU_GEM_DOMAIN_CPU:
> +	default:
> +		placement = " CPU";
> +		break;
> +	}
> +
> +	size = amdgpu_bo_size(bo);
> +	seq_printf(m, "\t\t0x%08x: %12ld byte %s",
> +			id++, size, placement);

Incrementing id here is pointless.

Maybe move this outside the function as well, but this is not a hard 
requirement.

> +
> +	pin_count = READ_ONCE(bo->pin_count);
> +	if (pin_count)
> +		seq_printf(m, " pin count %d", pin_count);
> +
> +	dma_buf = READ_ONCE(bo->tbo.base.dma_buf);
> +	attachment = READ_ONCE(bo->tbo.base.import_attach);
> +
> +	if (attachment)
> +		seq_printf(m, " imported from %p", dma_buf);
> +	else if (dma_buf)
> +		seq_printf(m, " exported as %p", dma_buf);
> +
> +	amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
> +	amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
> +	amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
> +	amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
> +	amdgpu_bo_print_flag(m, bo, SHADOW);
> +	amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
> +	amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
> +	amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
> +
> +	seq_puts(m, "\n");
> +
> +	return size;
> +}
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index afa5189dba7d..06fbff49958d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -330,6 +330,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev,
>   #if defined(CONFIG_DEBUG_FS)
>   void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
>   					 struct seq_file *m);
> +unsigned long amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m);
>   #endif
>   int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3cd949aad500..0e1cb399e508 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3392,3 +3392,91 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>   
>   	return false;
>   }
> +
> +#if defined(CONFIG_DEBUG_FS)
> +/**
> + * amdgpu_debugfs_vm_bo_info  - print BO info for the VM
> + *
> + * @vm: Requested VM for printing BO info
> + * @data: debugfs file
> + *
> + * Print BO information in debugfs file for the VM
> + */
> +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
> +{
> +	struct amdgpu_bo_va *bo_va, *tmp;
> +	u64 total_idle = 0;
> +	u64 total_evicted = 0;
> +	u64 total_relocated = 0;
> +	u64 total_moved = 0;
> +	u64 total_invalidated = 0;
> +	unsigned int total_idle_objs = 0;
> +	unsigned int total_evicted_objs = 0;
> +	unsigned int total_relocated_objs = 0;
> +	unsigned int total_moved_objs = 0;
> +	unsigned int total_invalidated_objs = 0;
> +	unsigned int id = 0;
> +
> +	/* Print info for Idle BOs */

I would drop those comments, they just duplicate what the printed text 
says anyway.

> +	seq_puts(m, "\tIdle BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_idle_objs = id;
> +	id = 0;
> +
> +	/* Print info for Evicted BOs */
> +	seq_puts(m, "\tEvicted BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_evicted_objs = id;
> +	id = 0;
> +
> +	/* Print info for Relocated BOs */
> +	seq_puts(m, "\tRelocated BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_relocated_objs = id;
> +	id = 0;
> +
> +	/* Print info for Moved BOs */
> +	seq_puts(m, "\tMoved BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_moved_objs = id;
> +	id = 0;
> +
> +	/* Print info for Invalidated BOs */
> +	seq_puts(m, "\tInvalidated BOs:\n");
> +	spin_lock(&vm->invalidated_lock);
> +	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_invalidated += amdgpu_bo_print_info(id++,	bo_va->base.bo, m);
> +	}
> +	spin_unlock(&vm->invalidated_lock);
> +	total_invalidated_objs = id;
> +
> +	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
> +								total_idle_objs);

The coding style here is still odd, the second line should start after 
the ( of the first line.

Apart from that looks good to me,
Christian.

> +	seq_printf(m, "\tTotal evicted size:     %12lld\tobjs:\t%d\n", total_evicted,
> +								total_evicted_objs);
> +	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
> +								total_relocated_objs);
> +	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
> +								total_moved_objs);
> +	seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
> +								total_invalidated_objs);
> +}
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 7c46937c1c0e..74cc14179c41 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -441,4 +441,8 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
>   void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
>   
> +#if defined(CONFIG_DEBUG_FS)
> +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
> +#endif
> +
>   #endif



More information about the amd-gfx mailing list