[Intel-gfx] [PATCH 2/2] drm/i915: Remove last traces of exec-id (GEM_BUSY)

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Mar 5 14:47:51 UTC 2019


On 02/03/2019 10:06, Chris Wilson wrote:
> As we allow per-context engine allows the legacy concept of
> I915_EXEC_RING no longer applies universally. We are still exposing the
> unrelated exec-id in GEM_BUSY, so transition this ioctl (once more
> slightly changing its ABI, but no one cares) over to only reporting the
> uabi-class (not instance as we can not foreseeably fit those into the
> small bitmask).
> 
> The only user of the extended ring information from GEM_BUSY is ddx/sna,
> which tries to use the non-rcs business information to guide which
> engine to use for subsequent operations on foreign bo. All that matters
> for it is the decision between rcs and !rcs, so it is unaffected by the
> change in higher bits.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c         | 32 +++++++++++++------------
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 10 --------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   include/uapi/drm/i915_drm.h             | 32 +++++++++++++------------
>   4 files changed, 34 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a1ad5e137a97..69413f99ed04 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3825,20 +3825,17 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>   
>   static __always_inline unsigned int __busy_read_flag(unsigned int id)
>   {
> -	/* Note that we could alias engines in the execbuf API, but
> -	 * that would be very unwise as it prevents userspace from
> -	 * fine control over engine selection. Ahem.
> -	 *
> -	 * This should be something like EXEC_MAX_ENGINE instead of
> -	 * I915_NUM_ENGINES.
> -	 */
> -	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
> +	if (id == I915_ENGINE_CLASS_INVALID)
> +		return 0xffff0000;
> +
> +	GEM_BUG_ON(id >= 16);
>   	return 0x10000 << id;
>   }
>   
>   static __always_inline unsigned int __busy_write_id(unsigned int id)
>   {
> -	/* The uABI guarantees an active writer is also amongst the read
> +	/*
> +	 * The uABI guarantees an active writer is also amongst the read
>   	 * engines. This would be true if we accessed the activity tracking
>   	 * under the lock, but as we perform the lookup of the object and
>   	 * its activity locklessly we can not guarantee that the last_write
> @@ -3846,16 +3843,20 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
>   	 * last_read - hence we always set both read and write busy for
>   	 * last_write.
>   	 */
> -	return id | __busy_read_flag(id);
> +	if (id == I915_ENGINE_CLASS_INVALID)
> +		return 0xffffffff;
> +
> +	return (id + 1) | __busy_read_flag(id);
>   }
>   
>   static __always_inline unsigned int
>   __busy_set_if_active(const struct dma_fence *fence,
>   		     unsigned int (*flag)(unsigned int id))
>   {
> -	struct i915_request *rq;
> +	const struct i915_request *rq;
>   
> -	/* We have to check the current hw status of the fence as the uABI
> +	/*
> +	 * We have to check the current hw status of the fence as the uABI
>   	 * guarantees forward progress. We could rely on the idle worker
>   	 * to eventually flush us, but to minimise latency just ask the
>   	 * hardware.
> @@ -3866,11 +3867,11 @@ __busy_set_if_active(const struct dma_fence *fence,
>   		return 0;
>   
>   	/* opencode to_request() in order to avoid const warnings */
> -	rq = container_of(fence, struct i915_request, fence);
> +	rq = container_of(fence, const struct i915_request, fence);
>   	if (i915_request_completed(rq))
>   		return 0;
>   
> -	return flag(rq->engine->uabi_id);
> +	return flag(rq->engine->uabi_class);
>   }
>   
>   static __always_inline unsigned int
> @@ -3904,7 +3905,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>   	if (!obj)
>   		goto out;
>   
> -	/* A discrepancy here is that we do not report the status of
> +	/*
> +	 * A discrepancy here is that we do not report the status of
>   	 * non-i915 fences, i.e. even though we may report the object as idle,
>   	 * a call to set-domain may still stall waiting for foreign rendering.
>   	 * This also means that wait-ioctl may report an object as busy,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2f7b6c7aeb78..62a2bbbbcc64 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -84,7 +84,6 @@ static const struct engine_class_info intel_engine_classes[] = {
>   #define MAX_MMIO_BASES 3
>   struct engine_info {
>   	unsigned int hw_id;
> -	unsigned int uabi_id;
>   	u8 class;
>   	u8 instance;
>   	/* mmio bases table *must* be sorted in reverse gen order */
> @@ -97,7 +96,6 @@ struct engine_info {
>   static const struct engine_info intel_engines[] = {
>   	[RCS0] = {
>   		.hw_id = RCS0_HW,
> -		.uabi_id = I915_EXEC_RENDER,
>   		.class = RENDER_CLASS,
>   		.instance = 0,
>   		.mmio_bases = {
> @@ -106,7 +104,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[BCS0] = {
>   		.hw_id = BCS0_HW,
> -		.uabi_id = I915_EXEC_BLT,
>   		.class = COPY_ENGINE_CLASS,
>   		.instance = 0,
>   		.mmio_bases = {
> @@ -115,7 +112,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VCS0] = {
>   		.hw_id = VCS0_HW,
> -		.uabi_id = I915_EXEC_BSD,
>   		.class = VIDEO_DECODE_CLASS,
>   		.instance = 0,
>   		.mmio_bases = {
> @@ -126,7 +122,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VCS1] = {
>   		.hw_id = VCS1_HW,
> -		.uabi_id = I915_EXEC_BSD,
>   		.class = VIDEO_DECODE_CLASS,
>   		.instance = 1,
>   		.mmio_bases = {
> @@ -136,7 +131,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VCS2] = {
>   		.hw_id = VCS2_HW,
> -		.uabi_id = I915_EXEC_BSD,
>   		.class = VIDEO_DECODE_CLASS,
>   		.instance = 2,
>   		.mmio_bases = {
> @@ -145,7 +139,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VCS3] = {
>   		.hw_id = VCS3_HW,
> -		.uabi_id = I915_EXEC_BSD,
>   		.class = VIDEO_DECODE_CLASS,
>   		.instance = 3,
>   		.mmio_bases = {
> @@ -154,7 +147,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VECS0] = {
>   		.hw_id = VECS0_HW,
> -		.uabi_id = I915_EXEC_VEBOX,
>   		.class = VIDEO_ENHANCEMENT_CLASS,
>   		.instance = 0,
>   		.mmio_bases = {
> @@ -164,7 +156,6 @@ static const struct engine_info intel_engines[] = {
>   	},
>   	[VECS1] = {
>   		.hw_id = VECS1_HW,
> -		.uabi_id = I915_EXEC_VEBOX,
>   		.class = VIDEO_ENHANCEMENT_CLASS,
>   		.instance = 1,
>   		.mmio_bases = {
> @@ -324,7 +315,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   
> -	engine->uabi_id = info->uabi_id;
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>   
>   	engine->context_size = __intel_engine_context_size(dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index ec968140b46f..18e865ff6637 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -336,7 +336,6 @@ struct intel_engine_cs {
>   	unsigned int guc_id;
>   	intel_engine_mask_t mask;
>   
> -	u8 uabi_id;
>   	u8 uabi_class;
>   
>   	u8 class;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 1a60642c1d61..aa2d4c73a97d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1127,32 +1127,34 @@ struct drm_i915_gem_busy {
>   	 * as busy may become idle before the ioctl is completed.
>   	 *
>   	 * Furthermore, if the object is busy, which engine is busy is only
> -	 * provided as a guide. There are race conditions which prevent the
> -	 * report of which engines are busy from being always accurate.
> -	 * However, the converse is not true. If the object is idle, the
> -	 * result of the ioctl, that all engines are idle, is accurate.
> +	 * provided as a guide and only indirectly by reporting its class
> +	 * (there may be more than one engine in each class). There are race
> +	 * conditions which prevent the report of which engines are busy from
> +	 * being always accurate.  However, the converse is not true. If the
> +	 * object is idle, the result of the ioctl, that all engines are idle,
> +	 * is accurate.
>   	 *
>   	 * The returned dword is split into two fields to indicate both
> -	 * the engines on which the object is being read, and the
> -	 * engine on which it is currently being written (if any).
> +	 * the engine classess on which the object is being read, and the
> +	 * engine class on which it is currently being written (if any).
>   	 *
>   	 * The low word (bits 0:15) indicate if the object is being written
>   	 * to by any engine (there can only be one, as the GEM implicit
>   	 * synchronisation rules force writes to be serialised). Only the
> -	 * engine for the last write is reported.
> +	 * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
> +	 * 1 not 0 etc) for the last write is reported.
>   	 *
> -	 * The high word (bits 16:31) are a bitmask of which engines are
> -	 * currently reading from the object. Multiple engines may be
> +	 * The high word (bits 16:31) are a bitmask of which engines classes
> +	 * are currently reading from the object. Multiple engines may be
>   	 * reading from the object simultaneously.
>   	 *
> -	 * The value of each engine is the same as specified in the
> -	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
> -	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
> -	 * the I915_EXEC_RENDER engine for execution, and so it is never
> +	 * The value of each engine class is the same as specified in the
> +	 * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
> +	 * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
>   	 * reported as active itself. Some hardware may have parallel
>   	 * execution engines, e.g. multiple media engines, which are
> -	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
> -	 * so are not separately reported for busyness.
> +	 * mapped to the same class identifier and so are not separately
> +	 * reported for busyness.
>   	 *
>   	 * Caveat emptor:
>   	 * Only the boolean result of this query is reliable; that is whether
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko


More information about the Intel-gfx mailing list