[PATCH] drm/v3d: Expose performance counters to userspace
Melissa Wen
melissa.srw at gmail.com
Mon Jun 14 16:19:25 UTC 2021
On 06/08, Juan A. Suarez Romero wrote:
> The V3D engine has several hardware performance counters that can of
> interest for userspace performance analysis tools.
>
> This exposes new ioctls to create and destroy performance monitor
> objects, as well as to query the counter values.
>
> Each created performance monitor object has an ID that can be attached
> to CL/CSD submissions, so the driver enables the requested counters when
> the job is submitted, and updates the performance monitor values when
> the job is done.
>
> It is up to the user to ensure all the jobs have been finished before
> getting the performance monitor values. It is also up to the user to
> properly synchronize BCL jobs when submitting jobs with different
> performance monitors attached.
>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Cc: David Airlie <airlied at linux.ie>
> Cc: Emma Anholt <emma at anholt.net>
> To: dri-devel at lists.freedesktop.org
> Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com>
Hi Juan,
I've checked it (+ mesa MR) on glxgears, and lgtm.
+ some basic tests from igt, by default.
Acked-by: Melissa Wen <mwen at igalia.com>
> ---
> drivers/gpu/drm/v3d/Makefile | 1 +
> drivers/gpu/drm/v3d/v3d_drv.c | 8 ++
> drivers/gpu/drm/v3d/v3d_drv.h | 63 +++++++++
> drivers/gpu/drm/v3d/v3d_gem.c | 31 +++++
> drivers/gpu/drm/v3d/v3d_perfmon.c | 213 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/v3d/v3d_regs.h | 2 +
> drivers/gpu/drm/v3d/v3d_sched.c | 16 +++
> include/uapi/drm/v3d_drm.h | 136 +++++++++++++++++++
> 8 files changed, 470 insertions(+)
> create mode 100644 drivers/gpu/drm/v3d/v3d_perfmon.c
>
> diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
> index db4cfc155821..e8b314137020 100644
> --- a/drivers/gpu/drm/v3d/Makefile
> +++ b/drivers/gpu/drm/v3d/Makefile
> @@ -9,6 +9,7 @@ v3d-y := \
> v3d_gem.o \
> v3d_irq.o \
> v3d_mmu.o \
> + v3d_perfmon.o \
> v3d_trace_points.o \
> v3d_sched.o
>
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index 99e22beea90b..9403c3b36aca 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -94,6 +94,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
> case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
> args->value = 1;
> return 0;
> + case DRM_V3D_PARAM_SUPPORTS_PERFMON:
> + args->value = (v3d->ver >= 40);
> + return 0;
> default:
> DRM_DEBUG("Unknown parameter %d\n", args->param);
> return -EINVAL;
> @@ -121,6 +124,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
> 1, NULL);
> }
>
> + v3d_perfmon_open_file(v3d_priv);
> file->driver_priv = v3d_priv;
>
> return 0;
> @@ -136,6 +140,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
> drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
> }
>
> + v3d_perfmon_close_file(v3d_priv);
> kfree(v3d_priv);
> }
>
> @@ -156,6 +161,9 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
> DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
> + DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
> };
>
> static const struct drm_driver v3d_drm_driver = {
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 8a390738d65b..270134779073 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -37,6 +37,40 @@ struct v3d_queue_state {
> u64 emit_seqno;
> };
>
> +/* Performance monitor object. The perform lifetime is controlled by userspace
> + * using perfmon related ioctls. A perfmon can be attached to a submit_cl
> + * request, and when this is the case, HW perf counters will be activated just
> + * before the submit_cl is submitted to the GPU and disabled when the job is
> + * done. This way, only events related to a specific job will be counted.
> + */
> +struct v3d_perfmon {
> + /* Tracks the number of users of the perfmon, when this counter reaches
> + * zero the perfmon is destroyed.
> + */
> + refcount_t refcnt;
> +
> + /* Protects perfmon stop, as it can be invoked from multiple places. */
> + struct mutex lock;
> +
> + /* Number of counters activated in this perfmon instance
> + * (should be less than DRM_V3D_MAX_PERF_COUNTERS).
> + */
> + u8 ncounters;
> +
> + /* Events counted by the HW perf counters. */
> + u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
> +
> + /* Storage for counter values. Counters are incremented by the
> + * HW perf counter values every time the perfmon is attached
> + * to a GPU job. This way, perfmon users don't have to
> + * retrieve the results after each job if they want to track
> + * events covering several submissions. Note that counter
> + * values can't be reset, but you can fake a reset by
> + * destroying the perfmon and creating a new one.
> + */
> + u64 values[];
> +};
> +
> struct v3d_dev {
> struct drm_device drm;
>
> @@ -89,6 +123,9 @@ struct v3d_dev {
> */
> spinlock_t job_lock;
>
> + /* Used to track the active perfmon if any. */
> + struct v3d_perfmon *active_perfmon;
> +
> /* Protects bo_stats */
> struct mutex bo_lock;
>
> @@ -133,6 +170,11 @@ v3d_has_csd(struct v3d_dev *v3d)
> struct v3d_file_priv {
> struct v3d_dev *v3d;
>
> + struct {
> + struct idr idr;
> + struct mutex lock;
> + } perfmon;
> +
> struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
> };
>
> @@ -205,6 +247,11 @@ struct v3d_job {
> */
> struct dma_fence *done_fence;
>
> + /* Pointer to a performance monitor object if the user requested it,
> + * NULL otherwise.
> + */
> + struct v3d_perfmon *perfmon;
> +
> /* Callback for the freeing of the job on refcount going to 0. */
> void (*free)(struct kref *ref);
> };
> @@ -353,3 +400,19 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
> /* v3d_sched.c */
> int v3d_sched_init(struct v3d_dev *v3d);
> void v3d_sched_fini(struct v3d_dev *v3d);
> +
> +/* v3d_perfmon.c */
> +void v3d_perfmon_get(struct v3d_perfmon *perfmon);
> +void v3d_perfmon_put(struct v3d_perfmon *perfmon);
> +void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon);
> +void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
> + bool capture);
> +struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id);
> +void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
> +void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
> +int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> +int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> +int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 4eb354226972..5689da118197 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -126,6 +126,8 @@ v3d_reset(struct v3d_dev *v3d)
> v3d_mmu_set_page_table(v3d);
> v3d_irq_reset(v3d);
>
> + v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
> +
> trace_v3d_reset_end(dev);
> }
>
> @@ -375,6 +377,9 @@ v3d_job_free(struct kref *ref)
> pm_runtime_mark_last_busy(job->v3d->drm.dev);
> pm_runtime_put_autosuspend(job->v3d->drm.dev);
>
> + if (job->perfmon)
> + v3d_perfmon_put(job->perfmon);
> +
> kfree(job);
> }
>
> @@ -539,6 +544,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>
> trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
>
> + if (args->pad != 0)
> + return -EINVAL;
> +
> if (args->flags != 0 &&
> args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
> DRM_INFO("invalid flags: %d\n", args->flags);
> @@ -611,8 +619,20 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> if (ret)
> goto fail;
>
> + if (args->perfmon_id) {
> + render->base.perfmon = v3d_perfmon_find(v3d_priv,
> + args->perfmon_id);
> +
> + if (!render->base.perfmon) {
> + ret = -ENOENT;
> + goto fail;
> + }
> + }
> +
> mutex_lock(&v3d->sched_lock);
> if (bin) {
> + bin->base.perfmon = render->base.perfmon;
> + v3d_perfmon_get(bin->base.perfmon);
> ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
> if (ret)
> goto fail_unreserve;
> @@ -633,6 +653,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
> if (ret)
> goto fail_unreserve;
> + clean_job->perfmon = render->base.perfmon;
> + v3d_perfmon_get(clean_job->perfmon);
> ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
> if (ret)
> goto fail_unreserve;
> @@ -827,6 +849,15 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
> if (ret)
> goto fail;
>
> + if (args->perfmon_id) {
> + job->base.perfmon = v3d_perfmon_find(v3d_priv,
> + args->perfmon_id);
> + if (!job->base.perfmon) {
> + ret = -ENOENT;
> + goto fail;
> + }
> + }
> +
> mutex_lock(&v3d->sched_lock);
> ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
> if (ret)
> diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
> new file mode 100644
> index 000000000000..0288ef063513
> --- /dev/null
> +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
> @@ -0,0 +1,213 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Raspberry Pi
> + */
> +
> +#include "v3d_drv.h"
> +#include "v3d_regs.h"
> +
> +#define V3D_PERFMONID_MIN 1
> +#define V3D_PERFMONID_MAX U32_MAX
> +
> +void v3d_perfmon_get(struct v3d_perfmon *perfmon)
> +{
> + if (perfmon)
> + refcount_inc(&perfmon->refcnt);
> +}
> +
> +void v3d_perfmon_put(struct v3d_perfmon *perfmon)
> +{
> + if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
> + kfree(perfmon);
> +}
> +
> +void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon)
> +{
> + unsigned int i;
> + u32 mask;
> + u8 ncounters = perfmon->ncounters;
> +
> + if (WARN_ON_ONCE(!perfmon || v3d->active_perfmon))
> + return;
> +
> + mask = GENMASK(ncounters - 1, 0);
> +
> + for (i = 0; i < ncounters; i++) {
> + u32 source = i / 4;
> + u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0);
> +
> + i++;
> + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
> + V3D_PCTR_S1);
> + i++;
> + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
> + V3D_PCTR_S2);
> + i++;
> + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
> + V3D_PCTR_S3);
> + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel);
> + }
> +
> + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_CLR, mask);
> + V3D_CORE_WRITE(0, V3D_PCTR_0_OVERFLOW, mask);
> + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, mask);
> +
> + v3d->active_perfmon = perfmon;
> +}
> +
> +void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
> + bool capture)
> +{
> + unsigned int i;
> +
> + if (!perfmon || !v3d->active_perfmon)
> + return;
> +
> + mutex_lock(&perfmon->lock);
> + if (perfmon != v3d->active_perfmon) {
> + mutex_unlock(&perfmon->lock);
> + return;
> + }
> +
> + if (capture)
> + for (i = 0; i < perfmon->ncounters; i++)
> + perfmon->values[i] += V3D_CORE_READ(0, V3D_PCTR_0_PCTRX(i));
> +
> + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, 0);
> +
> + v3d->active_perfmon = NULL;
> + mutex_unlock(&perfmon->lock);
> +}
> +
> +struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id)
> +{
> + struct v3d_perfmon *perfmon;
> +
> + mutex_lock(&v3d_priv->perfmon.lock);
> + perfmon = idr_find(&v3d_priv->perfmon.idr, id);
> + v3d_perfmon_get(perfmon);
> + mutex_unlock(&v3d_priv->perfmon.lock);
> +
> + return perfmon;
> +}
> +
> +void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv)
> +{
> + mutex_init(&v3d_priv->perfmon.lock);
> + idr_init(&v3d_priv->perfmon.idr);
> +}
> +
> +static int v3d_perfmon_idr_del(int id, void *elem, void *data)
> +{
> + struct v3d_perfmon *perfmon = elem;
> +
> + v3d_perfmon_put(perfmon);
> +
> + return 0;
> +}
> +
> +void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
> +{
> + mutex_lock(&v3d_priv->perfmon.lock);
> + idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
> + idr_destroy(&v3d_priv->perfmon.idr);
> + mutex_unlock(&v3d_priv->perfmon.lock);
> +}
> +
> +int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> + struct drm_v3d_perfmon_create *req = data;
> + struct v3d_perfmon *perfmon;
> + unsigned int i;
> + int ret;
> +
> + /* Number of monitored counters cannot exceed HW limits. */
> + if (req->ncounters > DRM_V3D_MAX_PERF_COUNTERS ||
> + !req->ncounters)
> + return -EINVAL;
> +
> + /* Make sure all counters are valid. */
> + for (i = 0; i < req->ncounters; i++) {
> + if (req->counters[i] >= V3D_PERFCNT_NUM)
> + return -EINVAL;
> + }
> +
> + perfmon = kzalloc(struct_size(perfmon, values, req->ncounters),
> + GFP_KERNEL);
> + if (!perfmon)
> + return -ENOMEM;
> +
> + for (i = 0; i < req->ncounters; i++)
> + perfmon->counters[i] = req->counters[i];
> +
> + perfmon->ncounters = req->ncounters;
> +
> + refcount_set(&perfmon->refcnt, 1);
> + mutex_init(&perfmon->lock);
> +
> + mutex_lock(&v3d_priv->perfmon.lock);
> + ret = idr_alloc(&v3d_priv->perfmon.idr, perfmon, V3D_PERFMONID_MIN,
> + V3D_PERFMONID_MAX, GFP_KERNEL);
> + mutex_unlock(&v3d_priv->perfmon.lock);
> +
> + if (ret < 0) {
> + kfree(perfmon);
> + return ret;
> + }
> +
> + req->id = ret;
> +
> + return 0;
> +}
> +
> +int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> + struct drm_v3d_perfmon_destroy *req = data;
> + struct v3d_perfmon *perfmon;
> +
> + mutex_lock(&v3d_priv->perfmon.lock);
> + perfmon = idr_remove(&v3d_priv->perfmon.idr, req->id);
> + mutex_unlock(&v3d_priv->perfmon.lock);
> +
> + if (!perfmon)
> + return -EINVAL;
> +
> + v3d_perfmon_put(perfmon);
> +
> + return 0;
> +}
> +
> +int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct v3d_dev *v3d = to_v3d_dev(dev);
> + struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> + struct drm_v3d_perfmon_get_values *req = data;
> + struct v3d_perfmon *perfmon;
> + int ret = 0;
> +
> + if (req->pad != 0)
> + return -EINVAL;
> +
> + mutex_lock(&v3d_priv->perfmon.lock);
> + perfmon = idr_find(&v3d_priv->perfmon.idr, req->id);
> + v3d_perfmon_get(perfmon);
> + mutex_unlock(&v3d_priv->perfmon.lock);
> +
> + if (!perfmon)
> + return -EINVAL;
> +
> + v3d_perfmon_stop(v3d, perfmon, true);
> +
> + if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->values,
> + perfmon->ncounters * sizeof(u64)))
> + ret = -EFAULT;
> +
> + v3d_perfmon_put(perfmon);
> +
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
> index 9bcb57781d31..3663e0d6bf76 100644
> --- a/drivers/gpu/drm/v3d/v3d_regs.h
> +++ b/drivers/gpu/drm/v3d/v3d_regs.h
> @@ -347,6 +347,8 @@
> /* Each src reg muxes four counters each. */
> #define V3D_V4_PCTR_0_SRC_0_3 0x00660
> #define V3D_V4_PCTR_0_SRC_28_31 0x0067c
> +#define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \
> + 4 * (x))
> # define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
> # define V3D_PCTR_S0_SHIFT 0
> # define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 8992480c88fa..c9a5c916d6eb 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -63,6 +63,16 @@ v3d_job_free(struct drm_sched_job *sched_job)
> v3d_job_put(job);
> }
>
> +static void
> +v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
> +{
> + if (job->perfmon != v3d->active_perfmon)
> + v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
> +
> + if (job->perfmon && v3d->active_perfmon != job->perfmon)
> + v3d_perfmon_start(v3d, job->perfmon);
> +}
> +
> /*
> * Returns the fences that the job depends on, one by one.
> *
> @@ -120,6 +130,8 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
> trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
> job->start, job->end);
>
> + v3d_switch_perfmon(v3d, &job->base);
> +
> /* Set the current and end address of the control list.
> * Writing the end register is what starts the job.
> */
> @@ -169,6 +181,8 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
> trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
> job->start, job->end);
>
> + v3d_switch_perfmon(v3d, &job->base);
> +
> /* XXX: Set the QCFG */
>
> /* Set the current and end address of the control list.
> @@ -240,6 +254,8 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
>
> trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
>
> + v3d_switch_perfmon(v3d, &job->base);
> +
> for (i = 1; i <= 6; i++)
> V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
> /* CFG0 write kicks off the job. */
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 1ce746e228d9..4104f22fb3d3 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -38,6 +38,9 @@ extern "C" {
> #define DRM_V3D_GET_BO_OFFSET 0x05
> #define DRM_V3D_SUBMIT_TFU 0x06
> #define DRM_V3D_SUBMIT_CSD 0x07
> +#define DRM_V3D_PERFMON_CREATE 0x08
> +#define DRM_V3D_PERFMON_DESTROY 0x09
> +#define DRM_V3D_PERFMON_GET_VALUES 0x0a
>
> #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
> #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
> @@ -47,6 +50,12 @@ extern "C" {
> #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
> #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
> #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
> +#define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \
> + struct drm_v3d_perfmon_create)
> +#define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \
> + struct drm_v3d_perfmon_destroy)
> +#define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
> + struct drm_v3d_perfmon_get_values)
>
> #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
>
> @@ -127,6 +136,11 @@ struct drm_v3d_submit_cl {
> __u32 bo_handle_count;
>
> __u32 flags;
> +
> + /* ID of the perfmon to attach to this job. 0 means no perfmon. */
> + __u32 perfmon_id;
> +
> + __u32 pad;
> };
>
> /**
> @@ -195,6 +209,7 @@ enum drm_v3d_param {
> DRM_V3D_PARAM_SUPPORTS_TFU,
> DRM_V3D_PARAM_SUPPORTS_CSD,
> DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
> + DRM_V3D_PARAM_SUPPORTS_PERFMON,
> };
>
> struct drm_v3d_get_param {
> @@ -258,6 +273,127 @@ struct drm_v3d_submit_csd {
> __u32 in_sync;
> /* Sync object to signal when the CSD job is done. */
> __u32 out_sync;
> +
> + /* ID of the perfmon to attach to this job. 0 means no perfmon. */
> + __u32 perfmon_id;
> +};
> +
> +enum {
> + V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
> + V3D_PERFCNT_FEP_VALID_PRIMS,
> + V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS,
> + V3D_PERFCNT_FEP_VALID_QUADS,
> + V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL,
> + V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL,
> + V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS,
> + V3D_PERFCNT_TLB_QUADS_ZERO_COV,
> + V3D_PERFCNT_TLB_QUADS_NONZERO_COV,
> + V3D_PERFCNT_TLB_QUADS_WRITTEN,
> + V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD,
> + V3D_PERFCNT_PTB_PRIM_CLIP,
> + V3D_PERFCNT_PTB_PRIM_REV,
> + V3D_PERFCNT_QPU_IDLE_CYCLES,
> + V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER,
> + V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG,
> + V3D_PERFCNT_QPU_CYCLES_VALID_INSTR,
> + V3D_PERFCNT_QPU_CYCLES_TMU_STALL,
> + V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL,
> + V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL,
> + V3D_PERFCNT_QPU_IC_HIT,
> + V3D_PERFCNT_QPU_IC_MISS,
> + V3D_PERFCNT_QPU_UC_HIT,
> + V3D_PERFCNT_QPU_UC_MISS,
> + V3D_PERFCNT_TMU_TCACHE_ACCESS,
> + V3D_PERFCNT_TMU_TCACHE_MISS,
> + V3D_PERFCNT_VPM_VDW_STALL,
> + V3D_PERFCNT_VPM_VCD_STALL,
> + V3D_PERFCNT_BIN_ACTIVE,
> + V3D_PERFCNT_RDR_ACTIVE,
> + V3D_PERFCNT_L2T_HITS,
> + V3D_PERFCNT_L2T_MISSES,
> + V3D_PERFCNT_CYCLE_COUNT,
> + V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER,
> + V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT,
> + V3D_PERFCNT_PTB_PRIMS_BINNED,
> + V3D_PERFCNT_AXI_WRITES_WATCH_0,
> + V3D_PERFCNT_AXI_READS_WATCH_0,
> + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0,
> + V3D_PERFCNT_AXI_READ_STALLS_WATCH_0,
> + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0,
> + V3D_PERFCNT_AXI_READ_BYTES_WATCH_0,
> + V3D_PERFCNT_AXI_WRITES_WATCH_1,
> + V3D_PERFCNT_AXI_READS_WATCH_1,
> + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1,
> + V3D_PERFCNT_AXI_READ_STALLS_WATCH_1,
> + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1,
> + V3D_PERFCNT_AXI_READ_BYTES_WATCH_1,
> + V3D_PERFCNT_TLB_PARTIAL_QUADS,
> + V3D_PERFCNT_TMU_CONFIG_ACCESSES,
> + V3D_PERFCNT_L2T_NO_ID_STALL,
> + V3D_PERFCNT_L2T_COM_QUE_STALL,
> + V3D_PERFCNT_L2T_TMU_WRITES,
> + V3D_PERFCNT_TMU_ACTIVE_CYCLES,
> + V3D_PERFCNT_TMU_STALLED_CYCLES,
> + V3D_PERFCNT_CLE_ACTIVE,
> + V3D_PERFCNT_L2T_TMU_READS,
> + V3D_PERFCNT_L2T_CLE_READS,
> + V3D_PERFCNT_L2T_VCD_READS,
> + V3D_PERFCNT_L2T_TMUCFG_READS,
> + V3D_PERFCNT_L2T_SLC0_READS,
> + V3D_PERFCNT_L2T_SLC1_READS,
> + V3D_PERFCNT_L2T_SLC2_READS,
> + V3D_PERFCNT_L2T_TMU_W_MISSES,
> + V3D_PERFCNT_L2T_TMU_R_MISSES,
> + V3D_PERFCNT_L2T_CLE_MISSES,
> + V3D_PERFCNT_L2T_VCD_MISSES,
> + V3D_PERFCNT_L2T_TMUCFG_MISSES,
> + V3D_PERFCNT_L2T_SLC0_MISSES,
> + V3D_PERFCNT_L2T_SLC1_MISSES,
> + V3D_PERFCNT_L2T_SLC2_MISSES,
> + V3D_PERFCNT_CORE_MEM_WRITES,
> + V3D_PERFCNT_L2T_MEM_WRITES,
> + V3D_PERFCNT_PTB_MEM_WRITES,
> + V3D_PERFCNT_TLB_MEM_WRITES,
> + V3D_PERFCNT_CORE_MEM_READS,
> + V3D_PERFCNT_L2T_MEM_READS,
> + V3D_PERFCNT_PTB_MEM_READS,
> + V3D_PERFCNT_PSE_MEM_READS,
> + V3D_PERFCNT_TLB_MEM_READS,
> + V3D_PERFCNT_GMP_MEM_READS,
> + V3D_PERFCNT_PTB_W_MEM_WORDS,
> + V3D_PERFCNT_TLB_W_MEM_WORDS,
> + V3D_PERFCNT_PSE_R_MEM_WORDS,
> + V3D_PERFCNT_TLB_R_MEM_WORDS,
> + V3D_PERFCNT_TMU_MRU_HITS,
> + V3D_PERFCNT_COMPUTE_ACTIVE,
> + V3D_PERFCNT_NUM,
> +};
> +
> +#define DRM_V3D_MAX_PERF_COUNTERS 32
> +
> +struct drm_v3d_perfmon_create {
> + __u32 id;
> + __u32 ncounters;
> + __u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
> +};
> +
> +struct drm_v3d_perfmon_destroy {
> + __u32 id;
> +};
> +
> +/*
> + * Returns the values of the performance counters tracked by this
> + * perfmon (as an array of ncounters u64 values).
> + *
> + * No implicit synchronization is performed, so the user has to
> + * guarantee that any jobs using this perfmon have already been
> + * completed (probably by blocking on the seqno returned by the
> + * last exec that used the perfmon).
> + */
> +struct drm_v3d_perfmon_get_values {
> + __u32 id;
> + __u32 pad;
> + __u64 values_ptr;
> };
>
> #if defined(__cplusplus)
> --
> 2.25.1
>
More information about the dri-devel
mailing list