[PATCH v2] drm/v3d: clean caches at the end of render jobs on request from user space
Iago Toral
itoral at igalia.com
Wed Sep 18 09:30:37 UTC 2019
Eric, could you push this for me? I don't have push rights to DRM yet.
Iago
On Wed, 2019-09-18 at 11:15 +0200, Iago Toral Quiroga wrote:
> Extends the user space ioctl for CL submissions so it can include a
> request
> to flush the cache once the CL execution has completed. Fixes memory
> write violation messages reported by the kernel in workloads
> involving
> shader memory writes (SSBOs, shader images, scratch, etc) which
> sometimes
> also lead to GPU resets during Piglit and CTS workloads.
>
> v2: if v3d_job_init() fails we need to kfree() the job instead of
> v3d_job_put() it (Eric Anholt).
>
> Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
> Reviewed-by: Eric Anholt <eric at anholt.net>
> Link:
> https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-itoral@igalia.com
> ---
> drivers/gpu/drm/v3d/v3d_gem.c | 54 +++++++++++++++++++++++++++++--
> ----
> include/uapi/drm/v3d_drm.h | 7 +++--
> 2 files changed, 50 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> b/drivers/gpu/drm/v3d/v3d_gem.c
> index 5d80507b539b..d46d91346d09 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
> struct drm_v3d_submit_cl *args = data;
> struct v3d_bin_job *bin = NULL;
> struct v3d_render_job *render;
> + struct v3d_job *clean_job = NULL;
> + struct v3d_job *last_job;
> struct ww_acquire_ctx acquire_ctx;
> int ret = 0;
>
> trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args-
> >rcl_end);
>
> - if (args->pad != 0) {
> - DRM_INFO("pad must be zero: %d\n", args->pad);
> + if (args->flags != 0 &&
> + args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> + DRM_INFO("invalid flags: %d\n", args->flags);
> return -EINVAL;
> }
>
> @@ -575,12 +578,31 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
> bin->render = render;
> }
>
> - ret = v3d_lookup_bos(dev, file_priv, &render->base,
> + if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> + clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
> + if (!clean_job) {
> + ret = -ENOMEM;
> + goto fail;
> + }
> +
> + ret = v3d_job_init(v3d, file_priv, clean_job,
> v3d_job_free, 0);
> + if (ret) {
> + kfree(clean_job);
> + clean_job = NULL;
> + goto fail;
> + }
> +
> + last_job = clean_job;
> + } else {
> + last_job = &render->base;
> + }
> +
> + ret = v3d_lookup_bos(dev, file_priv, last_job,
> args->bo_handles, args->bo_handle_count);
> if (ret)
> goto fail;
>
> - ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
> + ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
> if (ret)
> goto fail;
>
> @@ -599,28 +621,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
> ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
> if (ret)
> goto fail_unreserve;
> +
> + if (clean_job) {
> + struct dma_fence *render_fence =
> + dma_fence_get(render->base.done_fence);
> + ret = drm_gem_fence_array_add(&clean_job->deps,
> render_fence);
> + if (ret)
> + goto fail_unreserve;
> + ret = v3d_push_job(v3d_priv, clean_job,
> V3D_CACHE_CLEAN);
> + if (ret)
> + goto fail_unreserve;
> + }
> +
> mutex_unlock(&v3d->sched_lock);
>
> v3d_attach_fences_and_unlock_reservation(file_priv,
> - &render->base,
> + last_job,
> &acquire_ctx,
> args->out_sync,
> - render-
> >base.done_fence);
> + last_job->done_fence);
>
> if (bin)
> v3d_job_put(&bin->base);
> v3d_job_put(&render->base);
> + if (clean_job)
> + v3d_job_put(clean_job);
>
> return 0;
>
> fail_unreserve:
> mutex_unlock(&v3d->sched_lock);
> - drm_gem_unlock_reservations(render->base.bo,
> - render->base.bo_count,
> &acquire_ctx);
> + drm_gem_unlock_reservations(last_job->bo,
> + last_job->bo_count, &acquire_ctx);
> fail:
> if (bin)
> v3d_job_put(&bin->base);
> v3d_job_put(&render->base);
> + if (clean_job)
> + v3d_job_put(clean_job);
>
> return ret;
> }
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 58fbe48c91e9..58d2040ea48c 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -48,6 +48,8 @@ extern "C" {
> #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE +
> DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
> #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE +
> DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
>
> +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG 0x01
> +
> /**
> * struct drm_v3d_submit_cl - ioctl argument for submitting commands
> to the 3D
> * engine.
> @@ -61,7 +63,7 @@ extern "C" {
> * flushed by the time the render done IRQ happens, which is the
> * trigger for out_sync. Any dirtying of cachelines by the job
> (only
> * possible using TMU writes) must be flushed by the caller using
> the
> - * CL's cache flush commands.
> + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag.
> */
> struct drm_v3d_submit_cl {
> /* Pointer to the binner command list.
> @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl {
> /* Number of BO handles passed in (size is that times 4). */
> __u32 bo_handle_count;
>
> - /* Pad, must be zero-filled. */
> - __u32 pad;
> + __u32 flags;
> };
>
> /**
More information about the dri-devel
mailing list