[Intel-gfx] [PATCH 2/2] drm/i915: Remove last traces of exec-id (GEM_BUSY)
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Mar 5 14:47:51 UTC 2019
On 02/03/2019 10:06, Chris Wilson wrote:
> As we allow per-context engine allows the legacy concept of
> I915_EXEC_RING no longer applies universally. We are still exposing the
> unrelated exec-id in GEM_BUSY, so transition this ioctl (once more
> slightly changing its ABI, but no one cares) over to only reporting the
> uabi-class (not instance as we can not foreseeably fit those into the
> small bitmask).
>
> The only user of the extended ring information from GEM_BUSY is ddx/sna,
> which tries to use the non-rcs business information to guide which
> engine to use for subsequent operations on foreign bo. All that matters
> for it is the decision between rcs and !rcs, so it is unaffected by the
> change in higher bits.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++------------
> drivers/gpu/drm/i915/intel_engine_cs.c | 10 --------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
> include/uapi/drm/i915_drm.h | 32 +++++++++++++------------
> 4 files changed, 34 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a1ad5e137a97..69413f99ed04 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3825,20 +3825,17 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>
> static __always_inline unsigned int __busy_read_flag(unsigned int id)
> {
> - /* Note that we could alias engines in the execbuf API, but
> - * that would be very unwise as it prevents userspace from
> - * fine control over engine selection. Ahem.
> - *
> - * This should be something like EXEC_MAX_ENGINE instead of
> - * I915_NUM_ENGINES.
> - */
> - BUILD_BUG_ON(I915_NUM_ENGINES > 16);
> + if (id == I915_ENGINE_CLASS_INVALID)
> + return 0xffff0000;
> +
> + GEM_BUG_ON(id >= 16);
> return 0x10000 << id;
> }
>
> static __always_inline unsigned int __busy_write_id(unsigned int id)
> {
> - /* The uABI guarantees an active writer is also amongst the read
> + /*
> + * The uABI guarantees an active writer is also amongst the read
> * engines. This would be true if we accessed the activity tracking
> * under the lock, but as we perform the lookup of the object and
> * its activity locklessly we can not guarantee that the last_write
> @@ -3846,16 +3843,20 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
> * last_read - hence we always set both read and write busy for
> * last_write.
> */
> - return id | __busy_read_flag(id);
> + if (id == I915_ENGINE_CLASS_INVALID)
> + return 0xffffffff;
> +
> + return (id + 1) | __busy_read_flag(id);
> }
>
> static __always_inline unsigned int
> __busy_set_if_active(const struct dma_fence *fence,
> unsigned int (*flag)(unsigned int id))
> {
> - struct i915_request *rq;
> + const struct i915_request *rq;
>
> - /* We have to check the current hw status of the fence as the uABI
> + /*
> + * We have to check the current hw status of the fence as the uABI
> * guarantees forward progress. We could rely on the idle worker
> * to eventually flush us, but to minimise latency just ask the
> * hardware.
> @@ -3866,11 +3867,11 @@ __busy_set_if_active(const struct dma_fence *fence,
> return 0;
>
> /* opencode to_request() in order to avoid const warnings */
> - rq = container_of(fence, struct i915_request, fence);
> + rq = container_of(fence, const struct i915_request, fence);
> if (i915_request_completed(rq))
> return 0;
>
> - return flag(rq->engine->uabi_id);
> + return flag(rq->engine->uabi_class);
> }
>
> static __always_inline unsigned int
> @@ -3904,7 +3905,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
> if (!obj)
> goto out;
>
> - /* A discrepancy here is that we do not report the status of
> + /*
> + * A discrepancy here is that we do not report the status of
> * non-i915 fences, i.e. even though we may report the object as idle,
> * a call to set-domain may still stall waiting for foreign rendering.
> * This also means that wait-ioctl may report an object as busy,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2f7b6c7aeb78..62a2bbbbcc64 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -84,7 +84,6 @@ static const struct engine_class_info intel_engine_classes[] = {
> #define MAX_MMIO_BASES 3
> struct engine_info {
> unsigned int hw_id;
> - unsigned int uabi_id;
> u8 class;
> u8 instance;
> /* mmio bases table *must* be sorted in reverse gen order */
> @@ -97,7 +96,6 @@ struct engine_info {
> static const struct engine_info intel_engines[] = {
> [RCS0] = {
> .hw_id = RCS0_HW,
> - .uabi_id = I915_EXEC_RENDER,
> .class = RENDER_CLASS,
> .instance = 0,
> .mmio_bases = {
> @@ -106,7 +104,6 @@ static const struct engine_info intel_engines[] = {
> },
> [BCS0] = {
> .hw_id = BCS0_HW,
> - .uabi_id = I915_EXEC_BLT,
> .class = COPY_ENGINE_CLASS,
> .instance = 0,
> .mmio_bases = {
> @@ -115,7 +112,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VCS0] = {
> .hw_id = VCS0_HW,
> - .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 0,
> .mmio_bases = {
> @@ -126,7 +122,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VCS1] = {
> .hw_id = VCS1_HW,
> - .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 1,
> .mmio_bases = {
> @@ -136,7 +131,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VCS2] = {
> .hw_id = VCS2_HW,
> - .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 2,
> .mmio_bases = {
> @@ -145,7 +139,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VCS3] = {
> .hw_id = VCS3_HW,
> - .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 3,
> .mmio_bases = {
> @@ -154,7 +147,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VECS0] = {
> .hw_id = VECS0_HW,
> - .uabi_id = I915_EXEC_VEBOX,
> .class = VIDEO_ENHANCEMENT_CLASS,
> .instance = 0,
> .mmio_bases = {
> @@ -164,7 +156,6 @@ static const struct engine_info intel_engines[] = {
> },
> [VECS1] = {
> .hw_id = VECS1_HW,
> - .uabi_id = I915_EXEC_VEBOX,
> .class = VIDEO_ENHANCEMENT_CLASS,
> .instance = 1,
> .mmio_bases = {
> @@ -324,7 +315,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
> engine->class = info->class;
> engine->instance = info->instance;
>
> - engine->uabi_id = info->uabi_id;
> engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>
> engine->context_size = __intel_engine_context_size(dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index ec968140b46f..18e865ff6637 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -336,7 +336,6 @@ struct intel_engine_cs {
> unsigned int guc_id;
> intel_engine_mask_t mask;
>
> - u8 uabi_id;
> u8 uabi_class;
>
> u8 class;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 1a60642c1d61..aa2d4c73a97d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1127,32 +1127,34 @@ struct drm_i915_gem_busy {
> * as busy may become idle before the ioctl is completed.
> *
> * Furthermore, if the object is busy, which engine is busy is only
> - * provided as a guide. There are race conditions which prevent the
> - * report of which engines are busy from being always accurate.
> - * However, the converse is not true. If the object is idle, the
> - * result of the ioctl, that all engines are idle, is accurate.
> + * provided as a guide and only indirectly by reporting its class
> + * (there may be more than one engine in each class). There are race
> + * conditions which prevent the report of which engines are busy from
> + * being always accurate. However, the converse is not true. If the
> + * object is idle, the result of the ioctl, that all engines are idle,
> + * is accurate.
> *
> * The returned dword is split into two fields to indicate both
> - * the engines on which the object is being read, and the
> - * engine on which it is currently being written (if any).
> + * the engine classess on which the object is being read, and the
> + * engine class on which it is currently being written (if any).
> *
> * The low word (bits 0:15) indicate if the object is being written
> * to by any engine (there can only be one, as the GEM implicit
> * synchronisation rules force writes to be serialised). Only the
> - * engine for the last write is reported.
> + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
> + * 1 not 0 etc) for the last write is reported.
> *
> - * The high word (bits 16:31) are a bitmask of which engines are
> - * currently reading from the object. Multiple engines may be
> + * The high word (bits 16:31) are a bitmask of which engines classes
> + * are currently reading from the object. Multiple engines may be
> * reading from the object simultaneously.
> *
> - * The value of each engine is the same as specified in the
> - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
> - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
> - * the I915_EXEC_RENDER engine for execution, and so it is never
> + * The value of each engine class is the same as specified in the
> + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
> + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
> * reported as active itself. Some hardware may have parallel
> * execution engines, e.g. multiple media engines, which are
> - * mapped to the same identifier in the EXECBUFFER2 ioctl and
> - * so are not separately reported for busyness.
> + * mapped to the same class identifier and so are not separately
> + * reported for busyness.
> *
> * Caveat emptor:
> * Only the boolean result of this query is reliable; that is whether
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list