[Intel-gfx] [PATCH 05/18] drm/i915: Move GEM activity tracking into a common struct reservation_object

Wed Sep 14 09:44:04 UTC 2016

On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
> In preparation to support many distinct timelines, we need to expand the
> activity tracking on the GEM object to handle more than just a request
> per engine. We already use the struct reservation_object on the dma-buf
> to handle many fence contexts, so integrating that into the GEM object
> itself is the preferred solution. (For example, we can now share the same
> reservation_object between every consumer/producer using this buffer and
> skip the manual import/export via dma-buf.)
> 
> Caveats:

I'd make comments which patch in the series addresses each introduced
problem, which are fixable in future and which are taken as "a
permanent hit" for achieving multiple timelines. With a bit of
reasoning for each (now only a few points include some of this).

>  static inline struct drm_i915_gem_object *
> @@ -2347,35 +2341,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
>  	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
>  }
>  
> -static inline unsigned long
> -i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
> -{
> -	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
> -}
> -
>  static inline bool
>  i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
>  {
> -	return i915_gem_object_get_active(obj);
> -}
> -
> -static inline void
> -i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> -{
> -	obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
> -}
> -
> -static inline void
> -i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
> -{
> -	obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
> -}
> -
> -static inline bool
> -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> -				  int engine)
> -{
> -	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
> +	return obj->active_count;

our type is bool, so !!obj->active_count;

>  }
> 

<SNIP>

> 
>  static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned int other_rings = eb_other_engines(req);
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		struct reservation_object *resv;
> -
> -		if (obj->flags & other_rings) {
> -			ret = i915_gem_request_await_object
> -				(req, obj, obj->base.pending_write_domain);
> -			if (ret)
> -				return ret;
> -		}
>  
> -		resv = i915_gem_object_get_dmabuf_resv(obj);
> -		if (resv) {
> -			ret = i915_sw_fence_await_reservation
> -				(&req->submit, resv, &i915_fence_ops,
> -				 obj->base.pending_write_domain, 10*HZ,
> -				 GFP_KERNEL | __GFP_NOWARN);
> -			if (ret < 0)
> -				return ret;
> -		}
> +		ret = i915_gem_request_await_object
> +			(req, obj, obj->base.pending_write_domain);

I know it was previously like this, but I'm not sure I agree on this
style at all.

> @@ -11935,17 +11932,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
>  
>  	if (i915.use_mmio_flip < 0)
>  		return false;
> -	else if (i915.use_mmio_flip > 0)
> -		return true;
> -	else if (i915.enable_execlists)
> -		return true;
>  
> -	resv = i915_gem_object_get_dmabuf_resv(obj);
> -	if (resv && !reservation_object_test_signaled_rcu(resv, false))
> -		return true;
> -
> -	return engine != i915_gem_active_get_engine(&obj->last_write,
> -						    &obj->base.dev->struct_mutex);
> +	return true;

	return i915_use_mmio_flip >= 0; // ?

> @@ -860,39 +860,6 @@ struct drm_i915_gem_busy {
>  	 * long as no new GPU commands are executed upon it). Due to the
>  	 * asynchronous nature of the hardware, an object reported
>  	 * as busy may become idle before the ioctl is completed.
> -	 *
> -	 * Furthermore, if the object is busy, which engine is busy is only
> -	 * provided as a guide. There are race conditions which prevent the
> -	 * report of which engines are busy from being always accurate.
> -	 * However, the converse is not true. If the object is idle, the
> -	 * result of the ioctl, that all engines are idle, is accurate.
> -	 *
> -	 * The returned dword is split into two fields to indicate both
> -	 * the engines on which the object is being read, and the
> -	 * engine on which it is currently being written (if any).
> -	 *
> -	 * The low word (bits 0:15) indicate if the object is being written
> -	 * to by any engine (there can only be one, as the GEM implicit
> -	 * synchronisation rules force writes to be serialised). Only the
> -	 * engine for the last write is reported.
> -	 *
> -	 * The high word (bits 16:31) are a bitmask of which engines are
> -	 * currently reading from the object. Multiple engines may be
> -	 * reading from the object simultaneously.
> -	 *
> -	 * The value of each engine is the same as specified in the
> -	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
> -	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
> -	 * the I915_EXEC_RENDER engine for execution, and so it is never
> -	 * reported as active itself. Some hardware may have parallel
> -	 * execution engines, e.g. multiple media engines, which are
> -	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
> -	 * so are not separately reported for busyness.
> -	 *
> -	 * Caveat emptor:
> -	 * Only the boolean result of this query is reliable; that is whether
> -	 * the object is idle or busy. The report of which engines are busy
> -	 * should be only used as a heuristic.
>  	 */

Daniel to Ack this ABI change.

Reviewed-by: Joonas Lahtinen <joonas.lahtine at linux.intel.com>

Double check by somebody on the plane code would not hurt.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation