[PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

Christian König ckoenig.leichtzumerken at gmail.com
Fri Apr 23 09:32:43 UTC 2021



Am 23.04.21 um 11:19 schrieb Roy Sun:
> Tracking devices, process info and fence info using
> /proc/pid/fdinfo
>
> Signed-off-by: David M Nieto <David.Nieto at amd.com>
> Signed-off-by: Roy Sun <Roy.Sun at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile        |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 61 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h    |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 96 ++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 45 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +
>   11 files changed, 280 insertions(+), 2 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index ee85e8aba636..d216b7ecb5d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o
>   
> +amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
> +
>   amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
>   
>   # add asic specific block
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 125b25a5ce5b..3365feae15e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -107,6 +107,7 @@
>   #include "amdgpu_gfxhub.h"
>   #include "amdgpu_df.h"
>   #include "amdgpu_smuio.h"
> +#include "amdgpu_fdinfo.h"
>   
>   #define MAX_GPU_INSTANCE		16
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 0350205c4897..01fe60fedcbe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
>   	idr_destroy(&mgr->ctx_handles);
>   	mutex_destroy(&mgr->lock);
>   }
> +
> +void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity *centity,
> +		ktime_t *total, ktime_t *max)
> +{
> +	ktime_t now, t1;
> +	uint32_t i;
> +
> +	now = ktime_get();
> +	for (i = 0; i < amdgpu_sched_jobs; i++) {
> +		struct dma_fence *fence;
> +		struct drm_sched_fence *s_fence;
> +
> +		spin_lock(&ctx->ring_lock);
> +		fence = dma_fence_get(centity->fences[i]);
> +		spin_unlock(&ctx->ring_lock);
> +		if (!fence)
> +			continue;
> +		s_fence = to_drm_sched_fence(fence);
> +		if (!dma_fence_is_signaled(&s_fence->scheduled))
> +			continue;
> +		t1 = s_fence->scheduled.timestamp;
> +		if (t1 >= now)
> +			continue;
> +		if (dma_fence_is_signaled(&s_fence->finished) &&
> +			s_fence->finished.timestamp < now)
> +			*total += ktime_sub(s_fence->finished.timestamp, t1);
> +		else
> +			*total += ktime_sub(now, t1);
> +		t1 = ktime_sub(now, t1);
> +		dma_fence_put(fence);
> +		*max = max(t1, *max);
> +	}
> +}
> +
> +ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
> +		uint32_t idx, uint64_t *elapsed)
> +{
> +	struct idr *idp;
> +	struct amdgpu_ctx *ctx;
> +	uint32_t id;
> +	struct amdgpu_ctx_entity *centity;
> +	ktime_t total = 0, max = 0;
> +
> +	if (idx >= AMDGPU_MAX_ENTITY_NUM)
> +		return 0;
> +	idp = &mgr->ctx_handles;
> +	mutex_lock(&mgr->lock);
> +	idr_for_each_entry(idp, ctx, id) {
> +		if (!ctx->entities[hwip][idx])
> +			continue;
> +
> +		centity = ctx->entities[hwip][idx];
> +		amdgpu_ctx_fence_time(ctx, centity, &total, &max);
> +	}
> +
> +	mutex_unlock(&mgr->lock);
> +	if (elapsed)
> +		*elapsed = max;
> +
> +	return total;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index f54e10314661..10dcf59a5c6b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
>   void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
>   long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
>   void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
> -
> +ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
> +		uint32_t idx, uint64_t *elapsed);
> +void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity *centity,
> +		ktime_t *total, ktime_t *max);
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 0369d3532bf0..01603378dbc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -42,7 +42,7 @@
>   #include "amdgpu_irq.h"
>   #include "amdgpu_dma_buf.h"
>   #include "amdgpu_sched.h"
> -
> +#include "amdgpu_fdinfo.h"
>   #include "amdgpu_amdkfd.h"
>   
>   #include "amdgpu_ras.h"
> @@ -1692,6 +1692,9 @@ static const struct file_operations amdgpu_driver_kms_fops = {
>   #ifdef CONFIG_COMPAT
>   	.compat_ioctl = amdgpu_kms_compat_ioctl,
>   #endif
> +#ifdef CONFIG_PROC_FS
> +	.show_fdinfo = amdgpu_show_fdinfo
> +#endif
>   };
>   
>   int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
> new file mode 100644
> index 000000000000..f4ab7c65517e
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: MIT
> +/* Copyright 2021 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: David Nieto
> + *          Roy Sun
> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/uaccess.h>
> +#include <linux/reboot.h>
> +#include <linux/syscalls.h>
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/drm_debugfs.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_gem.h"
> +#include "amdgpu_ctx.h"
> +#include "amdgpu_fdinfo.h"
> +
> +
> +static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
> +	[AMDGPU_HW_IP_GFX]	=	"gfx",
> +	[AMDGPU_HW_IP_COMPUTE]	=	"compute",
> +	[AMDGPU_HW_IP_DMA]	=	"dma",
> +	[AMDGPU_HW_IP_UVD]	=	"dec",
> +	[AMDGPU_HW_IP_VCE]	=	"enc",
> +	[AMDGPU_HW_IP_UVD_ENC]	=	"enc_1",
> +	[AMDGPU_HW_IP_VCN_DEC]	=	"dec",
> +	[AMDGPU_HW_IP_VCN_ENC]	=	"enc",
> +	[AMDGPU_HW_IP_VCN_JPEG]	=	"jpeg",
> +};
> +
> +void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
> +{
> +	struct amdgpu_fpriv *fpriv;
> +	uint32_t bus, dev, fn, i, domain;
> +	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
> +	struct drm_file *file = f->private_data;
> +	struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
> +
> +	if (amdgpu_file_to_fpriv(f, &fpriv))
> +		return;
> +	bus = adev->pdev->bus->number;
> +	domain = pci_domain_nr(adev->pdev->bus);
> +	dev = PCI_SLOT(adev->pdev->devfn);
> +	fn = PCI_FUNC(adev->pdev->devfn);
> +


> +	amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);

You need to grab the VM lock before calling this.

Something like amdgpu_bo_reserve(vm->root.bo, 
false)/amdgpu_bo_unreserve(vm->root.bo) should do it.

Apart from that looks good to me.

Regards,
Christian.

> +	seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
> +			dev, fn, fpriv->vm.pasid);
> +	seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
> +	seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
> +	seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
> +		uint32_t count = amdgpu_ctx_num_entities[i];
> +		int idx = 0;
> +		uint64_t total = 0, min = 0;
> +		uint32_t perc, frac;
> +
> +		for (idx = 0; idx < count; idx++) {
> +			total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
> +				i, idx, &min);
> +			if ((total == 0) || (min == 0))
> +				continue;
> +
> +			perc = div64_u64(10000 * total, min);
> +			frac = perc % 100;
> +
> +			seq_printf(m, "%s%d:\t%d.%d%%\n",
> +					amdgpu_ip_name[i],
> +					idx, perc/100, frac);
> +		}
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
> new file mode 100644
> index 000000000000..41a4c7056729
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: MIT
> + * Copyright 2021 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: David Nieto
> + *          Roy Sun
> + */
> +#ifndef __AMDGPU_SMI_H__
> +#define __AMDGPU_SMI_H__
> +
> +#include <linux/idr.h>
> +#include <linux/kfifo.h>
> +#include <linux/rbtree.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/drm_file.h>
> +#include <drm/ttm/ttm_bo_driver.h>
> +#include <linux/sched/mm.h>
> +
> +#include "amdgpu_sync.h"
> +#include "amdgpu_ring.h"
> +#include "amdgpu_ids.h"
> +
> +uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
> +void amdgpu_show_fdinfo(struct seq_file *m, struct file *f);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 1345f7eba011..de728632b5df 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1304,6 +1304,26 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>   	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
>   }
>   
> +void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
> +				uint64_t *gtt_mem, uint64_t *cpu_mem)
> +{
> +	unsigned int domain;
> +
> +	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> +	switch (domain) {
> +	case AMDGPU_GEM_DOMAIN_VRAM:
> +		*vram_mem += amdgpu_bo_size(bo);
> +		break;
> +	case AMDGPU_GEM_DOMAIN_GTT:
> +		*gtt_mem += amdgpu_bo_size(bo);
> +		break;
> +	case AMDGPU_GEM_DOMAIN_CPU:
> +	default:
> +		*cpu_mem += amdgpu_bo_size(bo);
> +		break;
> +	}
> +}
> +
>   /**
>    * amdgpu_bo_release_notify - notification about a BO being released
>    * @bo: pointer to a buffer object
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 973c88bdf37b..659406475c2c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -304,6 +304,8 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
>   u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
>   int amdgpu_bo_validate(struct amdgpu_bo *bo);
> +void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
> +				uint64_t *gtt_mem, uint64_t *cpu_mem);
>   int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
>   			     struct dma_fence **fence);
>   uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f95bcda8463f..7c7a387c2285 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -25,6 +25,7 @@
>    *          Alex Deucher
>    *          Jerome Glisse
>    */
> +
>   #include <linux/dma-fence-array.h>
>   #include <linux/interval_tree_generic.h>
>   #include <linux/idr.h>
> @@ -1718,6 +1719,50 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	return r;
>   }
>   
> +void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
> +				uint64_t *gtt_mem, uint64_t *cpu_mem)
> +{
> +	struct amdgpu_bo_va *bo_va, *tmp;
> +
> +	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	spin_lock(&vm->invalidated_lock);
> +	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> +				gtt_mem, cpu_mem);
> +	}
> +	spin_unlock(&vm->invalidated_lock);
> +}
>   /**
>    * amdgpu_vm_bo_update - update all BO mappings in the vm page table
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 848e175e99ff..7f670d603895 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -457,6 +457,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>   void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
>   void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
> +void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
> +				uint64_t *gtt_mem, uint64_t *cpu_mem);
>   
>   #if defined(CONFIG_DEBUG_FS)
>   void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);



More information about the amd-gfx mailing list