[PATCH 1/2] drm/amdgpu: Convert to common fdinfo format v3
Christian König
ckoenig.leichtzumerken at gmail.com
Fri May 13 10:25:18 UTC 2022
Am 13.05.22 um 10:18 schrieb Sharma, Shashank:
> Hey Christian,
>
> On 5/11/2022 2:02 PM, Christian König wrote:
>> [SNIP]
>> @@ -162,17 +162,49 @@ static unsigned int
>> amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
>> return hw_prio;
>> }
>> +/* Calculate the time spend on the hw */
>> +static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
>> +{
>> + struct drm_sched_fence *s_fence;
>> +
>> + if (!fence)
>> + return ns_to_ktime(0);
>> +
>> + /* When the fence is not even scheduled it can't have spend time */
>> + s_fence = to_drm_sched_fence(fence);
>> + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
>> &s_fence->scheduled.flags))
>> + return ns_to_ktime(0);
>> +
>> + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
>> &s_fence->finished.flags))
>> + return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
> shouldn't this be s_fence->finished.timestamp instead of
> s_fence->scheduled.timestamp ?
No, the finished fence is not yet signaled. So it's timestamp isn't valid.
>> +
>> + return ktime_sub(s_fence->finished.timestamp,
>> + s_fence->scheduled.timestamp);
>> +}
>> +
>> +static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
>> + struct amdgpu_ctx_entity *centity)
>> +{
>> + ktime_t res = ns_to_ktime(0);
>> + uint32_t i;
>> +
>> + spin_lock(&ctx->ring_lock);
>> + for (i = 0; i < amdgpu_sched_jobs; i++) {
>> + res = ktime_add(res,
>> amdgpu_ctx_fence_time(centity->fences[i]));
>> + }
>> + spin_unlock(&ctx->ring_lock);
>> + return res;
>> +}
>> static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
>> const u32 ring)
>> {
>> - struct amdgpu_device *adev = ctx->adev;
>> - struct amdgpu_ctx_entity *entity;
>> struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
>> - unsigned num_scheds = 0;
>> - int32_t ctx_prio;
>> - unsigned int hw_prio;
>> + struct amdgpu_device *adev = ctx->mgr->adev;
>> + struct amdgpu_ctx_entity *entity;
>> enum drm_sched_priority drm_prio;
>> + unsigned int hw_prio, num_scheds;
>> + int32_t ctx_prio;
>> int r;
>> entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
>> @@ -182,6 +214,7 @@ static int amdgpu_ctx_init_entity(struct
>> amdgpu_ctx *ctx, u32 hw_ip,
>> ctx_prio = (ctx->override_priority ==
>> AMDGPU_CTX_PRIORITY_UNSET) ?
>> ctx->init_priority : ctx->override_priority;
>> + entity->hw_ip = hw_ip;
>> entity->sequence = 1;
>> hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
>> drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
>> @@ -220,11 +253,29 @@ static int amdgpu_ctx_init_entity(struct
>> amdgpu_ctx *ctx, u32 hw_ip,
>> return r;
>> }
>> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> +static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
>> +{
>> + ktime_t res = ns_to_ktime(0);
>> + int i;
>> +
>> + if (!entity)
>> + return res;
>> +
>> + for (i = 0; i < amdgpu_sched_jobs; ++i) {
>> + res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
>> + dma_fence_put(entity->fences[i]);
> Don't we need a corresponding _get() for this put() ?
The get is in amdgpu_ctx_add_fence().
> I had a quick look at amdgpu_ctx_fence_time() too, and it seems it had
> its own pair of get/put.
Yeah, which was completely superfluous. The fence is guaranteed to stay
valid as long as the lock is hold.
> [SNIP]
>>
>> @@ -40,7 +44,7 @@ struct amdgpu_ctx_entity {
>> struct amdgpu_ctx {
>> struct kref refcount;
>> - struct amdgpu_device *adev;
>> + struct amdgpu_ctx_mgr *mgr;
> A suggestion here, should we split this patch in 2 parts:
> - introduce ctx_mgr and change the respective APIs
> - use ctx_mgr and calculate the time spent per context
>
> It would make it easier to read and review.
Good point.
Thanks,
Christian.
>
> - Shashank
>
>> unsigned reset_counter;
>> unsigned reset_counter_query;
>> uint32_t vram_lost_counter;
>> @@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
>> struct mutex lock;
>> /* protected by lock */
>> struct idr ctx_handles;
>> + atomic64_t time_spend[AMDGPU_HW_IP_NUM];
>> };
>> extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
>> @@ -70,9 +75,10 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
>> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32
>> instance,
>> u32 ring, struct drm_sched_entity **entity);
>> -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
>> - struct drm_sched_entity *entity,
>> - struct dma_fence *fence, uint64_t *seq);
>> +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx_mgr *mgr,
>> + struct amdgpu_ctx *ctx,
>> + struct drm_sched_entity *entity,
>> + struct dma_fence *fence);
>> struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>> struct drm_sched_entity *entity,
>> uint64_t seq);
>> @@ -85,10 +91,11 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void
>> *data,
>> int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
>> struct drm_sched_entity *entity);
>> -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
>> +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
>> + struct amdgpu_device *adev);
>> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
>> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long
>> timeout);
>> void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>> -ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr,
>> uint32_t hwip,
>> - uint32_t idx, uint64_t *elapsed);
>> +ktime_t amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, uint32_t
>> hwip);
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
>> index 5a6857c44bb6..52c2b90925a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
>> @@ -32,6 +32,7 @@
>> #include <drm/amdgpu_drm.h>
>> #include <drm/drm_debugfs.h>
>> +#include <drm/drm_drv.h>
>> #include "amdgpu.h"
>> #include "amdgpu_vm.h"
>> @@ -55,10 +56,10 @@ static const char
>> *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
>> void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
>> {
>> struct amdgpu_fpriv *fpriv;
>> - uint32_t bus, dev, fn, i, domain;
>> uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
>> struct drm_file *file = f->private_data;
>> struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
>> + uint32_t bus, dev, fn, domain, hw_ip;
>> struct amdgpu_bo *root;
>> int ret;
>> @@ -83,29 +84,25 @@ void amdgpu_show_fdinfo(struct seq_file *m,
>> struct file *f)
>> amdgpu_bo_unreserve(root);
>> amdgpu_bo_unref(&root);
>> - seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n",
>> domain, bus,
>> - dev, fn, fpriv->vm.pasid);
>> - seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
>> - seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
>> - seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
>> - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
>> - uint32_t count = amdgpu_ctx_num_entities[i];
>> - int idx = 0;
>> - uint64_t total = 0, min = 0;
>> - uint32_t perc, frac;
>> + /*
>> + *
>> ******************************************************************
>> + * For text output format description please see
>> drm-usage-stats.rst!
>> + *
>> ******************************************************************
>> + */
>> - for (idx = 0; idx < count; idx++) {
>> - total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
>> - i, idx, &min);
>> - if ((total == 0) || (min == 0))
>> - continue;
>> + seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
>> + seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n",
>> domain, bus,
>> + dev, fn, fpriv->vm.pasid);
>> + seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
>> + seq_printf(m, "drm-memory-gtt:\t%llu KiB\n", gtt_mem/1024UL);
>> + seq_printf(m, "drm-memory-cpu:\t%llu KiB\n", cpu_mem/1024UL);
>> + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
>> + ktime_t usage = amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, hw_ip);
>> - perc = div64_u64(10000 * total, min);
>> - frac = perc % 100;
>> + if (!usage)
>> + continue;
>> - seq_printf(m, "%s%d:\t%d.%d%%\n",
>> - amdgpu_ip_name[i],
>> - idx, perc/100, frac);
>> - }
>> + seq_printf(m, "drm-engine-%s:\t%Ld ns\n",
>> + amdgpu_ip_name[hw_ip], ktime_to_ns(usage));
>> }
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 6b626c293e72..0814e6508a00 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1144,7 +1144,7 @@ int amdgpu_driver_open_kms(struct drm_device
>> *dev, struct drm_file *file_priv)
>> mutex_init(&fpriv->bo_list_lock);
>> idr_init(&fpriv->bo_list_handles);
>> - amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>> + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>> file_priv->driver_priv = fpriv;
>> goto out_suspend;
More information about the amd-gfx
mailing list