[PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job

Steven Price steven.price at arm.com
Fri Sep 13 13:44:14 UTC 2019


On 13/09/2019 12:17, Boris Brezillon wrote:
> The READ/WRITE flags are particularly useful if we want to avoid
> serialization of jobs that read from the same BO but never write to it.
> The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> shared but jobs are using different portions of the buffer.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>

Good feature - we could do with an (easy) way of the user driver
detecting this - so it might be worth bumping the driver version for this?

Some more comments below.

> ---
>  drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
>  drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
>  drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
>  include/uapi/drm/panfrost_drm.h         |  41 ++++++
>  4 files changed, 247 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index d74442d71048..08082fd557c3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
>  		  struct drm_panfrost_submit *args,
>  		  struct panfrost_job *job)
>  {
> -	job->bo_count = args->bo_handle_count;
> +	struct drm_panfrost_submit_bo *bo_descs = NULL;
> +	u32 *handles = NULL;
> +	u32 i, bo_count;
> +	int ret = 0;
>  
> -	if (!job->bo_count)
> +	bo_count = args->bo_desc_count ?
> +		   args->bo_desc_count : args->bo_handle_count;
> +	if (!bo_count)
>  		return 0;
>  
> -	job->implicit_fences = kvmalloc_array(job->bo_count,
> -				  sizeof(struct dma_fence *),
> +	job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
>  				  GFP_KERNEL | __GFP_ZERO);
> -	if (!job->implicit_fences)
> +	if (!job->bos)
>  		return -ENOMEM;
>  
> -	return drm_gem_objects_lookup(file_priv,
> -				      (void __user *)(uintptr_t)args->bo_handles,
> -				      job->bo_count, &job->bos);
> +	job->bo_count = bo_count;
> +	bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
> +				  GFP_KERNEL | __GFP_ZERO);
> +	if (!bo_descs) {
> +		ret = -ENOMEM;
> +		goto out;

This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.

> +	}
> +
> +	if (!args->bo_desc_count) {
> +		handles = kvmalloc_array(bo_count, sizeof(*handles),
> +					 GFP_KERNEL);
> +		if (!handles) {
> +			ret =-ENOMEM;
> +			goto out;
> +		}
> +
> +		if (copy_from_user(handles,
> +				   (void __user *)(uintptr_t)args->bo_handles,
> +				   job->bo_count * sizeof(*handles))) {
> +			ret = -EFAULT;
> +			goto out;
> +		}
> +
> +		for (i = 0; i < job->bo_count; i++) {
> +			bo_descs[i].handle = handles[i];
> +			bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
> +					    PANFROST_SUBMIT_BO_READ;

You can use PANFROST_SUBMIT_BO_RW here.

> +		}
> +	} else if (copy_from_user(bo_descs,
> +				  (void __user *)(uintptr_t)args->bo_descs,
> +				  job->bo_count * sizeof(*bo_descs))) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
> +                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		job->bos[i].flags = bo_descs[i].flags;
> +		job->bos[i].obj = drm_gem_object_lookup(file_priv,
> +							bo_descs[i].handle);
> +		if (!job->bos[i].obj) {
> +			ret = -ENOENT;
> +			goto out;
> +		}
> +	}
> +
> +out:
> +	kvfree(handles);
> +	kvfree(bo_descs);
> +	return ret;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 05c85f45a0de..e4b74fde9339 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  	pm_runtime_put_autosuspend(pfdev->dev);
>  }
>  
> -static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
> -					   int bo_count,
> -					   struct dma_fence **implicit_fences)
> +static int panfrost_acquire_object_fences(struct panfrost_job *job)
>  {
> -	int i;
> +	int i, ret;
>  
> -	for (i = 0; i < bo_count; i++)
> -		implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
> +	for (i = 0; i < job->bo_count; i++) {
> +		struct panfrost_job_bo_desc *bo = &job->bos[i];
> +		struct dma_resv *robj = bo->obj->resv;
> +
> +		if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
> +			ret = dma_resv_reserve_shared(robj, 1);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> +			continue;
> +
> +		if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
> +			ret = dma_resv_get_fences_rcu(robj, &bo->excl,
> +						      &bo->shared_count,
> +						      &bo->shared);
> +			if (ret)
> +				return ret;
> +		} else {
> +			bo->excl = dma_resv_get_excl_rcu(robj);
> +		}

The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ
| NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't
understand why.

> +	}
> +
> +	return 0;
>  }
>  
> -static void panfrost_attach_object_fences(struct drm_gem_object **bos,
> -					  int bo_count,
> -					  struct dma_fence *fence)
> +static void panfrost_attach_object_fences(struct panfrost_job *job)
>  {
>  	int i;
>  
> -	for (i = 0; i < bo_count; i++)
> -		dma_resv_add_excl_fence(bos[i]->resv, fence);
> +	for (i = 0; i < job->bo_count; i++) {
> +		struct drm_gem_object *obj = job->bos[i].obj;
> +
> +		if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
> +			dma_resv_add_excl_fence(obj->resv,
> +						job->render_done_fence);
> +		else
> +			dma_resv_add_shared_fence(obj->resv,
> +						  job->render_done_fence);
> +	}
> +}
> +
> +static int panfrost_job_lock_bos(struct panfrost_job *job,
> +				 struct ww_acquire_ctx *acquire_ctx)
> +{
> +	int contended = -1;
> +	int i, ret;
> +
> +	ww_acquire_init(acquire_ctx, &reservation_ww_class);
> +
> +retry:
> +	if (contended != -1) {
> +		struct drm_gem_object *obj = job->bos[contended].obj;
> +
> +		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
> +						       acquire_ctx);

dma_resv_lock_slot_interruptible()?

> +		if (ret) {
> +			ww_acquire_done(acquire_ctx);
> +			return ret;
> +		}
> +	}
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		if (i == contended)
> +			continue;
> +
> +		ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
> +						  acquire_ctx);

dma_resv_lock_interruptible()?

> +		if (ret) {
> +			int j;
> +
> +			for (j = 0; j < i; j++)
> +				ww_mutex_unlock(&job->bos[j].obj->resv->lock);
> +
> +			if (contended != -1 && contended >= i) {
> +				struct drm_gem_object *contended_obj;
> +
> +				contended_obj = job->bos[contended].obj;
> +				ww_mutex_unlock(&contended_obj->resv->lock);
> +			}
> +
> +			if (ret == -EDEADLK) {
> +				contended = i;
> +				goto retry;
> +			}
> +
> +			ww_acquire_done(acquire_ctx);
> +			return ret;
> +		}
> +	}
> +
> +	ww_acquire_done(acquire_ctx);
> +
> +	return 0;
> +}

This looks like a copy of drm_gem_lock_reservations(). The only reason
for it as far as I can see is because we now have an array of struct
panfrost_job_bo_desc rather than a direct array of struct
drm_gem_object. I'm not sure having everything neatly in one structure
is worth this cost?

> +
> +static void panfrost_job_unlock_bos(struct panfrost_job *job,
> +				    struct ww_acquire_ctx *acquire_ctx)
> +{
> +	int i;
> +
> +	for (i = 0; i < job->bo_count; i++)
> +		ww_mutex_unlock(&job->bos[i].obj->resv->lock);
> +
> +	ww_acquire_fini(acquire_ctx);
>  }>
>  int panfrost_job_push(struct panfrost_job *job)
> @@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	mutex_lock(&pfdev->sched_lock);
>  
> -	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
> -					    &acquire_ctx);
> +	ret = panfrost_job_lock_bos(job, &acquire_ctx);
>  	if (ret) {
>  		mutex_unlock(&pfdev->sched_lock);
>  		return ret;
> @@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	kref_get(&job->refcount); /* put by scheduler job completion */
>  
> -	panfrost_acquire_object_fences(job->bos, job->bo_count,
> -				       job->implicit_fences);
> +	panfrost_acquire_object_fences(job);
>  
>  	drm_sched_entity_push_job(&job->base, entity);
>  
>  	mutex_unlock(&pfdev->sched_lock);
>  
> -	panfrost_attach_object_fences(job->bos, job->bo_count,
> -				      job->render_done_fence);
> +	panfrost_attach_object_fences(job);
>  
>  unlock:
> -	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
> +	panfrost_job_unlock_bos(job, &acquire_ctx);
>  
>  	return ret;
>  }
> @@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref)
>  			dma_fence_put(job->in_fences[i]);
>  		kvfree(job->in_fences);
>  	}
> -	if (job->implicit_fences) {
> -		for (i = 0; i < job->bo_count; i++)
> -			dma_fence_put(job->implicit_fences[i]);
> -		kvfree(job->implicit_fences);
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		unsigned int j;
> +
> +		dma_fence_put(job->bos[i].excl);
> +		for (j = 0; j < job->bos[i].shared_count; j++)
> +			dma_fence_put(job->bos[i].shared[j]);
>  	}
> +
>  	dma_fence_put(job->done_fence);
>  	dma_fence_put(job->render_done_fence);
>  
> -	if (job->bos) {
> -		for (i = 0; i < job->bo_count; i++)
> -			drm_gem_object_put_unlocked(job->bos[i]);
> -		kvfree(job->bos);
> -	}
> +	for (i = 0; i < job->bo_count; i++)
> +		drm_gem_object_put_unlocked(job->bos[i].obj);
>  
> +	kvfree(job->bos);
>  	kfree(job);
>  }
>  
> @@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job
>  		}
>  	}
>  
> -	/* Implicit fences, max. one per BO */
> +	/* Implicit fences */
>  	for (i = 0; i < job->bo_count; i++) {
> -		if (job->implicit_fences[i]) {
> -			fence = job->implicit_fences[i];
> -			job->implicit_fences[i] = NULL;
> +		unsigned int j;
> +
> +		if (job->bos[i].excl) {
> +			fence = job->bos[i].excl;
> +			job->bos[i].excl = NULL;
> +			return fence;
> +		}
> +
> +		for (j = 0; j < job->bos[i].shared_count; j++) {
> +			if (!job->bos[i].shared[j])
> +				continue;
> +
> +			fence = job->bos[i].shared[j];
> +			job->bos[i].shared[j] = NULL;
>  			return fence;
>  		}
>  	}
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index 62454128a792..53440640a6e3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -11,6 +11,14 @@ struct panfrost_device;
>  struct panfrost_gem_object;
>  struct panfrost_file_priv;
>  
> +struct panfrost_job_bo_desc {
> +	struct drm_gem_object *obj;
> +	struct dma_fence *excl;
> +	struct dma_fence **shared;
> +	unsigned int shared_count;
> +	u32 flags;
> +};
> +
>  struct panfrost_job {
>  	struct drm_sched_job base;
>  
> @@ -31,8 +39,7 @@ struct panfrost_job {
>  	__u32 flush_id;
>  
>  	/* Exclusive fences we have taken from the BOs to wait for */

Comment needs updating

> -	struct dma_fence **implicit_fences;
> -	struct drm_gem_object **bos;
> +	struct panfrost_job_bo_desc *bos;
>  	u32 bo_count;
>  
>  	/* Fence to be signaled by drm-sched once its done with the job */
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index ec19db1eead8..029c6ae1b1f0 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -40,6 +40,28 @@ extern "C" {
>  #define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
>  
>  #define PANFROST_JD_REQ_FS (1 << 0)
> +
> +#define PANFROST_SUBMIT_BO_READ			(1 << 0)
> +#define PANFROST_SUBMIT_BO_WRITE		(1 << 1)
> +#define PANFROST_SUBMIT_BO_RW			(PANFROST_SUBMIT_BO_READ | \
> +						 PANFROST_SUBMIT_BO_WRITE)
> +#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE	(1 << 2)
> +#define PANFROST_SUBMIT_BO_VALID_FLAGS		\
> +	(PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> +
> +/**
> + * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl.
> + *
> + * Useful to give detailed information about BOs used by a given job.
> + *
> + * @handle: the GEM handle
> + * @flags: a combination of PANFROST_SUBMIT_BO_*
> + */
> +struct drm_panfrost_submit_bo {
> +	__u32 handle;
> +	__u32 flags;
> +};
> +
>  /**
>   * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
>   * engine.
> @@ -68,6 +90,25 @@ struct drm_panfrost_submit {
>  
>  	/** A combination of PANFROST_JD_REQ_* */
>  	__u32 requirements;
> +
> +	/**
> +	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
> +	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
> +	 * not provide enough information to relax synchronization between
> +	 * jobs that only only read the BO they share. When both
> +	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
> +	 * are provided, drm_panfrost_submit.bo_handles is ignored.
> +	 */
> +	__u64 bo_descs;
> +
> +	/**
> +	 * Number of BO descriptors passed in (size is that times
> +	 * sizeof(drm_panfrost_submit_bo_desc)).
> +	 */
> +	__u32 bo_desc_count;

We don't really need another count field. bo_handle_count could be
re-used. Indeed this could even be handled with just a flags field with
a new flag specifying that bo_handles no longer points to handles but to
bo_desc objects instead.

Steve

> +
> +	/** Padding, must be 0. */
> +	__u32 pad;
>  };
>  
>  /**
> 



More information about the dri-devel mailing list