[PATCH v2 07/17] drm/v3d: Add a CPU job submission
Iago Toral
itoral at igalia.com
Mon Nov 27 08:45:04 UTC 2023
El jue, 23-11-2023 a las 21:47 -0300, Maíra Canal escribió:
> From: Melissa Wen <mwen at igalia.com>
>
> Create a new type of job, a CPU job. A CPU job is a type of job that
> performs operations that requires CPU intervention. The overall idea
> is
> to use user extensions to enable different types of CPU job, allowing
> the
> CPU job to perform different operations according to the type of user
> externsion. The user extension ID identify the type of CPU job that
s/externsion/extension
Iago
> must
> be dealt.
>
> Having a CPU job is interesting for synchronization purposes as a CPU
> job has a queue like any other V3D job and can be synchoronized by
> the
> multisync extension.
>
> Signed-off-by: Melissa Wen <mwen at igalia.com>
> Co-developed-by: Maíra Canal <mcanal at igalia.com>
> Signed-off-by: Maíra Canal <mcanal at igalia.com>
> ---
> drivers/gpu/drm/v3d/v3d_drv.c | 4 ++
> drivers/gpu/drm/v3d/v3d_drv.h | 14 +++++-
> drivers/gpu/drm/v3d/v3d_sched.c | 57 +++++++++++++++++++++++
> drivers/gpu/drm/v3d/v3d_submit.c | 79
> ++++++++++++++++++++++++++++++++
> include/uapi/drm/v3d_drm.h | 17 +++++++
> 5 files changed, 170 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> b/drivers/gpu/drm/v3d/v3d_drv.c
> index 44a1ca57d6a4..3debf37e7d9b 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -91,6 +91,9 @@ static int v3d_get_param_ioctl(struct drm_device
> *dev, void *data,
> case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> args->value = 1;
> return 0;
> + case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE:
> + args->value = 1;
> + return 0;
> default:
> DRM_DEBUG("Unknown parameter %d\n", args->param);
> return -EINVAL;
> @@ -189,6 +192,7 @@ static const struct drm_ioctl_desc
> v3d_drm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE,
> v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY,
> v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES,
> v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl,
> DRM_RENDER_ALLOW | DRM_AUTH),
> };
>
> static const struct drm_driver v3d_drm_driver = {
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> b/drivers/gpu/drm/v3d/v3d_drv.h
> index 4db9ace66024..010b146140ad 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -19,7 +19,7 @@ struct reset_control;
>
> #define GMP_GRANULARITY (128 * 1024)
>
> -#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
> +#define V3D_MAX_QUEUES (V3D_CPU + 1)
>
> static inline char *v3d_queue_to_string(enum v3d_queue queue)
> {
> @@ -29,6 +29,7 @@ static inline char *v3d_queue_to_string(enum
> v3d_queue queue)
> case V3D_TFU: return "tfu";
> case V3D_CSD: return "csd";
> case V3D_CACHE_CLEAN: return "cache_clean";
> + case V3D_CPU: return "cpu";
> }
> return "UNKNOWN";
> }
> @@ -122,6 +123,7 @@ struct v3d_dev {
> struct v3d_render_job *render_job;
> struct v3d_tfu_job *tfu_job;
> struct v3d_csd_job *csd_job;
> + struct v3d_cpu_job *cpu_job;
>
> struct v3d_queue_state queue[V3D_MAX_QUEUES];
>
> @@ -312,6 +314,14 @@ struct v3d_csd_job {
> struct drm_v3d_submit_csd args;
> };
>
> +enum v3d_cpu_job_type {};
> +
> +struct v3d_cpu_job {
> + struct v3d_job base;
> +
> + enum v3d_cpu_job_type job_type;
> +};
> +
> struct v3d_submit_outsync {
> struct drm_syncobj *syncobj;
> };
> @@ -414,6 +424,8 @@ int v3d_submit_tfu_ioctl(struct drm_device *dev,
> void *data,
> struct drm_file *file_priv);
> int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +int v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
>
> /* v3d_irq.c */
> int v3d_irq_init(struct v3d_dev *v3d);
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
> b/drivers/gpu/drm/v3d/v3d_sched.c
> index fccbea2a5f2e..a32c91b94898 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -55,6 +55,12 @@ to_csd_job(struct drm_sched_job *sched_job)
> return container_of(sched_job, struct v3d_csd_job,
> base.base);
> }
>
> +static struct v3d_cpu_job *
> +to_cpu_job(struct drm_sched_job *sched_job)
> +{
> + return container_of(sched_job, struct v3d_cpu_job,
> base.base);
> +}
> +
> static void
> v3d_sched_job_free(struct drm_sched_job *sched_job)
> {
> @@ -262,6 +268,42 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
> return fence;
> }
>
> +static struct dma_fence *
> +v3d_cpu_job_run(struct drm_sched_job *sched_job)
> +{
> + struct v3d_cpu_job *job = to_cpu_job(sched_job);
> + struct v3d_dev *v3d = job->base.v3d;
> + struct v3d_file_priv *file = job->base.file->driver_priv;
> + u64 runtime;
> +
> + void (*v3d_cpu_job_fn[])(struct v3d_cpu_job *job) = { };
> +
> + v3d->cpu_job = job;
> +
> + if (job->job_type >= ARRAY_SIZE(v3d_cpu_job_fn)) {
> + DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job-
> >job_type);
> + return NULL;
> + }
> +
> + file->start_ns[V3D_CPU] = local_clock();
> + v3d->queue[V3D_CPU].start_ns = file->start_ns[V3D_CPU];
> +
> + v3d_cpu_job_fn[job->job_type](job);
> +
> + runtime = local_clock() - file->start_ns[V3D_CPU];
> +
> + file->enabled_ns[V3D_CPU] += runtime;
> + v3d->queue[V3D_CPU].enabled_ns += runtime;
> +
> + file->jobs_sent[V3D_CPU]++;
> + v3d->queue[V3D_CPU].jobs_sent++;
> +
> + file->start_ns[V3D_CPU] = 0;
> + v3d->queue[V3D_CPU].start_ns = 0;
> +
> + return NULL;
> +}
> +
> static struct dma_fence *
> v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
> {
> @@ -416,6 +458,12 @@ static const struct drm_sched_backend_ops
> v3d_cache_clean_sched_ops = {
> .free_job = v3d_sched_job_free
> };
>
> +static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
> + .run_job = v3d_cpu_job_run,
> + .timedout_job = v3d_generic_job_timedout,
> + .free_job = v3d_sched_job_free
> +};
> +
> int
> v3d_sched_init(struct v3d_dev *v3d)
> {
> @@ -471,6 +519,15 @@ v3d_sched_init(struct v3d_dev *v3d)
> goto fail;
> }
>
> + ret = drm_sched_init(&v3d->queue[V3D_CPU].sched,
> + &v3d_cpu_sched_ops, NULL,
> + DRM_SCHED_PRIORITY_COUNT,
> + 1, job_hang_limit,
> + msecs_to_jiffies(hang_limit_ms), NULL,
> + NULL, "v3d_cpu", v3d->drm.dev);
> + if (ret)
> + goto fail;
> +
> return 0;
>
> fail:
> diff --git a/drivers/gpu/drm/v3d/v3d_submit.c
> b/drivers/gpu/drm/v3d/v3d_submit.c
> index ed1a310bbd2f..d4b85ab16345 100644
> --- a/drivers/gpu/drm/v3d/v3d_submit.c
> +++ b/drivers/gpu/drm/v3d/v3d_submit.c
> @@ -761,3 +761,82 @@ v3d_submit_csd_ioctl(struct drm_device *dev,
> void *data,
>
> return ret;
> }
> +
> +/**
> + * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D.
> + * @dev: DRM device
> + * @data: ioctl argument
> + * @file_priv: DRM file for this fd
> + *
> + * Userspace specifies the CPU job type and data required to perform
> its
> + * operations through the drm_v3d_extension struct.
> + */
> +int
> +v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct v3d_dev *v3d = to_v3d_dev(dev);
> + struct drm_v3d_submit_cpu *args = data;
> + struct v3d_submit_ext se = {0};
> + struct v3d_cpu_job *cpu_job = NULL;
> + struct ww_acquire_ctx acquire_ctx;
> + int ret;
> +
> + if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION))
> {
> + DRM_INFO("invalid flags: %d\n", args->flags);
> + return -EINVAL;
> + }
> +
> + ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job));
> + if (ret)
> + return ret;
> +
> + if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
> + ret = v3d_get_extensions(file_priv, args->extensions,
> &se);
> + if (ret) {
> + DRM_DEBUG("Failed to get extensions.\n");
> + goto fail;
> + }
> + }
> +
> + /* Every CPU job must have a CPU job user extension */
> + if (!cpu_job->job_type) {
> + DRM_DEBUG("CPU job must have a CPU job user
> extension.\n");
> + goto fail;
> + }
> +
> + ret = v3d_job_init(v3d, file_priv, (void *)&cpu_job,
> sizeof(*cpu_job),
> + v3d_job_free, 0, &se, V3D_CPU);
> + if (ret)
> + goto fail;
> +
> + if (args->bo_handle_count) {
> + ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base,
> + args->bo_handles, args-
> >bo_handle_count);
> + if (ret)
> + goto fail;
> +
> + ret = v3d_lock_bo_reservations(&cpu_job->base,
> &acquire_ctx);
> + if (ret)
> + goto fail;
> + }
> +
> + mutex_lock(&v3d->sched_lock);
> + v3d_push_job(&cpu_job->base);
> + mutex_unlock(&v3d->sched_lock);
> +
> + v3d_attach_fences_and_unlock_reservation(file_priv,
> + &cpu_job->base,
> + &acquire_ctx, 0,
> + NULL, cpu_job-
> >base.done_fence);
> +
> + v3d_job_put((void *)cpu_job);
> +
> + return 0;
> +
> +fail:
> + v3d_job_cleanup((void *)cpu_job);
> + v3d_put_multisync_post_deps(&se);
> +
> + return ret;
> +}
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 1a7d7a689de3..00abef9d0db7 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -41,6 +41,7 @@ extern "C" {
> #define DRM_V3D_PERFMON_CREATE 0x08
> #define DRM_V3D_PERFMON_DESTROY 0x09
> #define DRM_V3D_PERFMON_GET_VALUES 0x0a
> +#define DRM_V3D_SUBMIT_CPU 0x0b
>
> #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE
> + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
> #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE
> + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
> @@ -56,6 +57,7 @@ extern "C" {
> struct
> drm_v3d_perfmon_destroy)
> #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE
> + DRM_V3D_PERFMON_GET_VALUES, \
> struct
> drm_v3d_perfmon_get_values)
> +#define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE +
> DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
>
> #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
> #define DRM_V3D_SUBMIT_EXTENSION 0x02
> @@ -93,6 +95,7 @@ enum v3d_queue {
> V3D_TFU,
> V3D_CSD,
> V3D_CACHE_CLEAN,
> + V3D_CPU,
> };
>
> /**
> @@ -276,6 +279,7 @@ enum drm_v3d_param {
> DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
> DRM_V3D_PARAM_SUPPORTS_PERFMON,
> DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
> + DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE,
> };
>
> struct drm_v3d_get_param {
> @@ -361,6 +365,19 @@ struct drm_v3d_submit_csd {
> __u32 pad;
> };
>
> +struct drm_v3d_submit_cpu {
> + /* Pointer to a u32 array of the BOs that are referenced by
> the job. */
> + __u64 bo_handles;
> +
> + /* Number of BO handles passed in (size is that times 4). */
> + __u32 bo_handle_count;
> +
> + __u32 flags;
> +
> + /* Pointer to an array of ioctl extensions*/
> + __u64 extensions;
> +};
> +
> enum {
> V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
> V3D_PERFCNT_FEP_VALID_PRIMS,
More information about the dri-devel
mailing list