[PATCH] drm/v3d: clean caches at the end of render jobs on request from user space

Fri Sep 13 06:29:10 UTC 2019

On Thu, 2019-09-12 at 10:25 -0700, Eric Anholt wrote:
> Iago Toral Quiroga <itoral at igalia.com> writes:
> 
> > Extends the user space ioctl for CL submissions so it can include a
> > request
> > to flush the cache once the CL execution has completed. Fixes
> > memory
> > write violation messages reported by the kernel in workloads
> > involving
> > shader memory writes (SSBOs, shader images, scratch, etc) which
> > sometimes
> > also lead to GPU resets during Piglit and CTS workloads.
> 
> Some context for any other reviewers: This patch is the interface
> change
> necessary to expose GLES 3.1 on V3D.  It turns out the HW packets for
> flushing the caches were broken in multiple ways.
> 
> > Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
> > ---
> >  drivers/gpu/drm/v3d/v3d_gem.c | 51 +++++++++++++++++++++++++++++
> > ------
> >  include/uapi/drm/v3d_drm.h    |  7 ++---
> >  2 files changed, 47 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> > b/drivers/gpu/drm/v3d/v3d_gem.c
> > index 5d80507b539b..530fe9d9d5bd 100644
> > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> > void *data,
> >  	struct drm_v3d_submit_cl *args = data;
> >  	struct v3d_bin_job *bin = NULL;
> >  	struct v3d_render_job *render;
> > +	struct v3d_job *clean_job = NULL;
> > +	struct v3d_job *last_job;
> >  	struct ww_acquire_ctx acquire_ctx;
> >  	int ret = 0;
> >  
> >  	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args-
> > >rcl_end);
> >  
> > -	if (args->pad != 0) {
> > -		DRM_INFO("pad must be zero: %d\n", args->pad);
> > +	if (args->flags != 0 &&
> > +	    args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> > +		DRM_INFO("invalid flags: %d\n", args->flags);
> >  		return -EINVAL;
> >  	}
> >  
> > @@ -575,12 +578,28 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> > void *data,
> >  		bin->render = render;
> >  	}
> >  
> > -	ret = v3d_lookup_bos(dev, file_priv, &render->base,
> > +	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> > +		clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
> > +		if (!clean_job) {
> > +			ret = -ENOMEM;
> > +			goto fail;
> > +		}
> > +
> > +		ret = v3d_job_init(v3d, file_priv, clean_job,
> > v3d_job_free, 0);
> > +		if (ret)
> > +			goto fail;
> 
> Only issue I see: If v3d_job_init() fails, we need to not
> v3d_job_put()
> it.  I'm fine with either kfree() it and NULL the ptr before jumping
> to
> fail, or open code the bin/render puts.

It seems we also call v3d_job_put() for the bin job when v3d_job_init()
fails, which also returns immediately in that case instead of jumping
to fail to v3d_job_put the render job, so I guess we need the same
treatment there. Shall I fix that in this patch too or would you rather
see a different patch sent separately for that?

> With that,
> 
> Reviewed-by: Eric Anholt <eric at anholt.net>
> 
> > +
> > +		last_job = clean_job;
> > +	} else {
> > +		last_job = &render->base;
> > +	}
> > +
> > +	ret = v3d_lookup_bos(dev, file_priv, last_job,
> >  			     args->bo_handles, args->bo_handle_count);
> >  	if (ret)
> >  		goto fail;
> >  
> > -	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
> > +	ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
> >  	if (ret)
> >  		goto fail;
> >  
> > @@ -599,28 +618,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> > void *data,
> >  	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
> >  	if (ret)
> >  		goto fail_unreserve;
> > +
> > +	if (clean_job) {
> > +		struct dma_fence *render_fence =
> > +			dma_fence_get(render->base.done_fence);
> > +		ret = drm_gem_fence_array_add(&clean_job->deps,
> > render_fence);
> > +		if (ret)
> > +			goto fail_unreserve;
> > +		ret = v3d_push_job(v3d_priv, clean_job,
> > V3D_CACHE_CLEAN);
> > +		if (ret)
> > +			goto fail_unreserve;
> > +	}
> > +
> >  	mutex_unlock(&v3d->sched_lock);
> >  
> >  	v3d_attach_fences_and_unlock_reservation(file_priv,
> > -						 &render->base,
> > +						 last_job,
> >  						 &acquire_ctx,
> >  						 args->out_sync,
> > -						 render-
> > >base.done_fence);
> > +						 last_job->done_fence);
> >  
> >  	if (bin)
> >  		v3d_job_put(&bin->base);
> >  	v3d_job_put(&render->base);
> > +	if (clean_job)
> > +		v3d_job_put(clean_job);
> >  
> >  	return 0;
> >  
> >  fail_unreserve:
> >  	mutex_unlock(&v3d->sched_lock);
> > -	drm_gem_unlock_reservations(render->base.bo,
> > -				    render->base.bo_count,
> > &acquire_ctx);
> > +	drm_gem_unlock_reservations(last_job->bo,
> > +				    last_job->bo_count, &acquire_ctx);
> >  fail:
> >  	if (bin)
> >  		v3d_job_put(&bin->base);
> >  	v3d_job_put(&render->base);
> > +	if (clean_job)
> > +		v3d_job_put(clean_job);
> >  
> >  	return ret;
> >  }
> > diff --git a/include/uapi/drm/v3d_drm.h
> > b/include/uapi/drm/v3d_drm.h
> > index 58fbe48c91e9..58d2040ea48c 100644
> > --- a/include/uapi/drm/v3d_drm.h
> > +++ b/include/uapi/drm/v3d_drm.h
> > @@ -48,6 +48,8 @@ extern "C" {
> >  #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE
> > + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
> >  #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE
> > + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
> >  
> > +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG        0x01
> > +
> >  /**
> >   * struct drm_v3d_submit_cl - ioctl argument for submitting
> > commands to the 3D
> >   * engine.
> > @@ -61,7 +63,7 @@ extern "C" {
> >   * flushed by the time the render done IRQ happens, which is the
> >   * trigger for out_sync.  Any dirtying of cachelines by the job
> > (only
> >   * possible using TMU writes) must be flushed by the caller using
> > the
> > - * CL's cache flush commands.
> > + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag.
> >   */
> >  struct drm_v3d_submit_cl {
> >  	/* Pointer to the binner command list.
> > @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl {
> >  	/* Number of BO handles passed in (size is that times 4). */
> >  	__u32 bo_handle_count;
> >  
> > -	/* Pad, must be zero-filled. */
> > -	__u32 pad;
> > +	__u32 flags;
> >  };
> >  
> >  /**
> > -- 
> > 2.17.1