[PATCH 2/2] drm/amdgpu: add the interface of waiting multiple fences (v4)

Gustavo Padovan gustavo at padovan.org
Tue Nov 8 00:36:40 UTC 2016


2016-11-07 Christian König <deathsimple at vodafone.de>:

> Am 07.11.2016 um 02:10 schrieb Gustavo Padovan:
> > Hi Alex,
> > 
> > 2016-11-04 Alex Deucher <alexdeucher at gmail.com>:
> > 
> > > From: Junwei Zhang <Jerry.Zhang at amd.com>
> > > 
> > > v2: agd: rebase and squash in all the previous optimizations and
> > > changes so everything compiles.
> > > v3: squash in Slava's 32bit build fix
> > > v4: rebase on drm-next (fence -> dma_fence),
> > >      squash in Monk's ioctl update patch
> > > 
> > > Signed-off-by: Junwei Zhang <Jerry.Zhang at amd.com>
> > > Reviewed-by: Monk Liu <monk.liu at amd.com>
> > > Reviewed-by: Jammy Zhou <Jammy.Zhou at amd.com>
> > > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   2 +
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 173 ++++++++++++++++++++++++++++++++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c  |   2 +-
> > >   include/uapi/drm/amdgpu_drm.h           |  28 ++++++
> > >   5 files changed, 205 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > index dc98ceb..7a94a3c 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > @@ -1212,6 +1212,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> > >   			struct drm_file *filp);
> > >   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> > >   int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> > > +int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
> > > +				struct drm_file *filp);
> > >   int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> > >   				struct drm_file *filp);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > index 2728805..2004836 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > @@ -1130,6 +1130,179 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
> > >   }
> > >   /**
> > > + * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
> > > + *
> > > + * @adev: amdgpu device
> > > + * @filp: file private
> > > + * @user: drm_amdgpu_fence copied from user space
> > > + */
> > > +static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
> > > +					     struct drm_file *filp,
> > > +					     struct drm_amdgpu_fence *user)
> > > +{
> > > +	struct amdgpu_ring *ring;
> > > +	struct amdgpu_ctx *ctx;
> > > +	struct dma_fence *fence;
> > > +	int r;
> > > +
> > > +	r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
> > > +			       user->ring, &ring);
> > > +	if (r)
> > > +		return ERR_PTR(r);
> > > +
> > > +	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
> > > +	if (ctx == NULL)
> > > +		return ERR_PTR(-EINVAL);
> > > +
> > > +	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
> > > +	amdgpu_ctx_put(ctx);
> > > +
> > > +	return fence;
> > > +}
> > > +
> > > +/**
> > > + * amdgpu_cs_wait_all_fence - wait on all fences to signal
> > > + *
> > > + * @adev: amdgpu device
> > > + * @filp: file private
> > > + * @wait: wait parameters
> > > + * @fences: array of drm_amdgpu_fence
> > > + */
> > > +static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
> > > +				     struct drm_file *filp,
> > > +				     union drm_amdgpu_wait_fences *wait,
> > > +				     struct drm_amdgpu_fence *fences)
> > > +{
> > > +	uint32_t fence_count = wait->in.fence_count;
> > > +	unsigned i;
> > > +	long r = 1;
> > > +
> > > +	for (i = 0; i < fence_count; i++) {
> > > +		struct dma_fence *fence;
> > > +		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
> > > +
> > > +		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
> > > +		if (IS_ERR(fence))
> > > +			return PTR_ERR(fence);
> > > +		else if (!fence)
> > > +			continue;
> > > +
> > > +		r = dma_fence_wait_timeout(fence, true, timeout);
> > > +		if (r < 0)
> > > +			return r;
> > > +
> > > +		if (r == 0)
> > > +			break;
> > > +	}
> > > +
> > > +	memset(wait, 0, sizeof(*wait));
> > > +	wait->out.status = (r > 0);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/**
> > > + * amdgpu_cs_wait_any_fence - wait on any fence to signal
> > > + *
> > > + * @adev: amdgpu device
> > > + * @filp: file private
> > > + * @wait: wait parameters
> > > + * @fences: array of drm_amdgpu_fence
> > > + */
> > > +static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
> > > +				    struct drm_file *filp,
> > > +				    union drm_amdgpu_wait_fences *wait,
> > > +				    struct drm_amdgpu_fence *fences)
> > > +{
> > > +	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
> > > +	uint32_t fence_count = wait->in.fence_count;
> > > +	uint32_t first = ~0;
> > > +	struct dma_fence **array;
> > > +	unsigned i;
> > > +	long r;
> > > +
> > > +	/* Prepare the fence array */
> > > +	array = (struct dma_fence **)kcalloc(fence_count, sizeof(struct dma_fence *),
> > > +			GFP_KERNEL);
> > > +	if (array == NULL)
> > > +		return -ENOMEM;
> > > +
> > > +	for (i = 0; i < fence_count; i++) {
> > > +		struct dma_fence *fence;
> > > +
> > > +		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
> > > +		if (IS_ERR(fence)) {
> > > +			r = PTR_ERR(fence);
> > > +			goto err_free_fence_array;
> > > +		} else if (fence) {
> > > +			array[i] = fence;
> > > +		} else { /* NULL, the fence has been already signaled */
> > > +			r = 1;
> > > +			goto out;
> > > +		}
> > > +	}
> > > +
> > > +	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, &first);
> > > +	if (r < 0)
> > > +		goto err_free_fence_array;
> > > +
> > > +out:
> > > +	memset(wait, 0, sizeof(*wait));
> > > +	wait->out.status = (r > 0);
> > > +	wait->out.first_signaled = first;
> > > +	/* set return value 0 to indicate success */
> > > +	r = 0;
> > > +
> > > +err_free_fence_array:
> > > +	for (i = 0; i < fence_count; i++)
> > > +		dma_fence_put(array[i]);
> > > +	kfree(array);
> > > +
> > > +	return r;
> > > +}
> > > +
> > > +/**
> > > + * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
> > > + *
> > > + * @dev: drm device
> > > + * @data: data from userspace
> > > + * @filp: file private
> > > + */
> > > +int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
> > > +				struct drm_file *filp)
> > > +{
> > > +	struct amdgpu_device *adev = dev->dev_private;
> > > +	union drm_amdgpu_wait_fences *wait = data;
> > > +	uint32_t fence_count = wait->in.fence_count;
> > > +	struct drm_amdgpu_fence *fences_user;
> > > +	struct drm_amdgpu_fence *fences;
> > > +	int r;
> > > +
> > > +	/* Get the fences from userspace */
> > > +	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
> > > +			GFP_KERNEL);
> > > +	if (fences == NULL)
> > > +		return -ENOMEM;
> > > +
> > > +	fences_user = (void __user *)(unsigned long)(wait->in.fences);
> > > +	if (copy_from_user(fences, fences_user,
> > > +		sizeof(struct drm_amdgpu_fence) * fence_count)) {
> > > +		r = -EFAULT;
> > > +		goto err_free_fences;
> > > +	}
> > > +
> > > +	if (wait->in.wait_all)
> > > +		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
> > > +	else
> > > +		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
> > I wonder if it wouldn't be better if we use fence_array here and
> > register callbacks to get notfied of the first signaled fence the "any" case.
> > It seems to me that we could simplify this code by using a fence_array.
> 
> I had this code in mind as well when working on the fence_array.
> 
> But this code actually precedes the fence_array implementation, so I would
> like to push it upstream unchanged and then clean it up to use the fence
> array.
> 
> That would make our backporting efforts a bit easier and shouldn't affect
> upstream to much in any way.

That sounds good to me. Should add an extra patch to this
patchset to do the conversion right away?

Gustavo



More information about the dri-devel mailing list