[Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

Daniel Vetter daniel at ffwll.ch
Thu Mar 29 20:47:47 CEST 2012


On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
> Implement the context switch code as well as the interfaces to do the
> context switch. This patch also doesn't match 1:1 with the RFC patches.
> The main difference is that from Daniel's responses the last context
> object is now stored instead of the last context. This aids in allows us
> to free the context data structure, and context object independently.
> 
> There is room for optimization: this code will pin the context object
> until the next context is active. The optimal way to do it is to
> actually pin the object, move it to the active list, do the context
> switch, and then unpin it. This allows the eviction code to actually
> evict the context object if needed.
> 
> The context switch code is missing workarounds, they will be implemented
> in future patches.
> 
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>

Meh, I've forgotten half of the review, some more comments below.

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |    5 ++
>  drivers/gpu/drm/i915/i915_gem_context.c |  118 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   26 +++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    5 ++
>  4 files changed, 153 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f458a8f..c6c2ada 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1303,10 +1303,15 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  				    enum i915_cache_level cache_level);
>  
>  /* i915_gem_context.c */
> +#define I915_CONTEXT_FORCED_SWITCH (1<<0)
> +#define I915_CONTEXT_INITIAL_SWITCH (1<<1)
>  void i915_gem_context_load(struct drm_device *dev);
>  void i915_gem_context_unload(struct drm_device *dev);
>  void i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
>  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
> +int i915_switch_context(struct intel_ring_buffer *ring,
> +			struct drm_file *file,
> +			int to_id, u32 seqno, u32 flags);
>  
>  /* i915_gem_gtt.c */
>  int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 321bafd..cc508d5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -262,7 +262,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
>  	mutex_unlock(&dev->struct_mutex);
>  }
>  
> -static __used struct i915_hw_context *
> +static struct i915_hw_context *
>  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
>  {
>  	struct i915_hw_context *ctx = NULL;
> @@ -279,3 +279,119 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
>  
>  	return ctx;
>  }
> +
> +static int do_switch(struct drm_i915_gem_object *from_obj,
> +		     struct i915_hw_context *to,
> +		     u32 seqno, u32 flags)
> +{
> +	bool initial_switch = (flags & I915_CONTEXT_INITIAL_SWITCH) ? true : false;
> +	bool force =  (flags & I915_CONTEXT_FORCED_SWITCH) ? true : false;

I don't like this flag passing. force seems to be totally unused. And imo
inital_switch would work much better if we track this in
i915_hw_context->intialized (which will be false on context create) and
check this when before doing the context switch.

That way you'd also lose the complexity of having to initialize a context
explicitly after create - it would just magically happen.

> +	struct intel_ring_buffer *ring = NULL;
> +	u32 hw_flags = 0;
> +	int ret;
> +
> +	BUG_ON(to == NULL);
> +	BUG_ON(from_obj != NULL && from_obj->pin_count == 0);
> +	BUG_ON((from_obj != NULL && from_obj->context_id < 0) || to->obj->context_id < 0);
> +
> +	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false);
> +	if (ret)
> +		return ret;
> +
> +	if (initial_switch)
> +		hw_flags |= MI_RESTORE_INHIBIT;
> +	if (force)
> +		hw_flags |= MI_FORCE_RESTORE;

Please move this into the function that actually emits the MI_ command -
it's imo hard to read things if it's distributed all over the place. And
because we don't seem to need force, a simple (and destriptive) bool
inhibit_restore would be sufficient.

> +
> +	ring = to->ring;
> +	ret = intel_ring_mi_set_context(ring, to, hw_flags);
> +	if (ret) {
> +		i915_gem_object_unpin(to->obj);
> +		return ret;
> +	}
> +
> +	/* The backing object for the context is done after switching to the
> +	 * *next* context. Therefore we cannot retire the previous context until
> +	 * the next context has already started running. In fact, the below code
> +	 * is a bit suboptimal because the retiring can occur simply after the
> +	 * MI_SET_CONTEXT instead of when the next seqno has completed.
> +	 */
> +	if (from_obj != NULL) {
> +		i915_gem_object_move_to_active(from_obj, ring, seqno);

You can use outstanding lazy request here to ditch the seqno argument from
do_switch and i915_switch_context.

> +		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
> +		 * whole damn pipeline, we don't need to explicitly mark the
> +		 * object dirty. It should be safe to evict the object at any
> +		 * point after MI_SET_CONTEXT has finished executing... true as
> +		 * of GEN7. If not from_obj->dirty=1 would make this safer.
> +		 */

Actually you _need_ to set to->obj->dirty = 1, otherwise we don't tell
shmem that we've dirtied the pages backing the context gem_bo at unbind
time.

> +		BUG_ON(from_obj->ring != to->ring);
> +	}
> +
> +	if (from_obj)
> +		i915_gem_object_unpin(from_obj);
> +
> +	ring->last_context_obj = to->obj;
> +
> +	return 0;
> +}
> +
> +/**
> + * i915_switch_context() - perform a GPU context switch.
> + * @ring: ring for which we'll execute the context switch
> + * @file_priv: file_priv associated with the context, may be NULL
> + * @id: context id number
> + * @seqno: sequence number by which the new context will be switched to
> + * @flags:
> + *
> + * This function will perform the context switch to the given context id on the
> + * specified ring. Unfortunately the context switch code doesn't have an
> + * independent way of knowing when the context switch has occurred, so the
> + * caller must notify of us a time by which the context switch has occurred.
> + */

It took me a while to figure out that you mean the caller has to pass in
the retiring seqno. This can be fixed with i915_gem_next_request_seqno.
(i.e. the outstanding lazy request stuff).

> +int i915_switch_context(struct intel_ring_buffer *ring,
> +			struct drm_file *file,
> +			int to_id, u32 seqno, u32 flags)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct drm_i915_file_private *file_priv = NULL;
> +	struct i915_hw_context *to;
> +	struct drm_i915_gem_object *from_obj = ring->last_context_obj;
> +	int ret;
> +
> +	if (dev_priv->hw_contexts_disabled)
> +		return 0;
> +
> +	if (ring != &dev_priv->ring[RCS])
> +		return 0;

Because your execbuffer integration seems to pass in the ctx_id unchecked,
this is actually a userspace bug and should be shot down (a lot earlier
than this even, i.e. before we try to run the batch) with -EINVAL. Same
for the one above.

> +
> +	if (file)
> +		file_priv = file->driver_priv;

Some of these checks (and also some of the BUG_ONs you're sprinkling) look
rather strange. This one here doesn't bother to do anything when !file,
which won't end well.

> +
> +	if (to_id == DEFAULT_CONTEXT_ID) {
> +		to = ring->default_context;
> +		kref_get(&to->nref);
> +	} else {
> +		to = i915_gem_context_get(file_priv, to_id);
> +		if (to == NULL)
> +			return -EINVAL;
> +	}
> +	drm_gem_object_reference(&to->obj->base);
> +
> +	/* If the object is still on the active list, we can shortcut */
> +	if (from_obj == to->obj) {
> +		ret = 0;
> +		goto out;
> +	}
> +
> +	ret = do_switch(from_obj, to, seqno, flags);
> +	if (ret) {
> +		drm_gem_object_unreference(&to->obj->base);
> +		goto out;
> +	}
> +
> +out:
> +	if (from_obj != NULL)
> +		drm_gem_object_unreference(&from_obj->base);
> +	kref_put(&to->nref, destroy_hw_context);
> +	return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ca3972f..cd74f86 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -920,6 +920,32 @@ render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
>  	return 0;
>  }
>  
> +int intel_ring_mi_set_context(struct intel_ring_buffer *ring,
> +			      struct i915_hw_context *new_context,
> +			      u32 hw_flags)
> +{
> +	int ret;
> +
> +	ret = intel_ring_begin(ring, 4);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_emit(ring, MI_SET_CONTEXT);
> +	intel_ring_emit(ring, new_context->obj->gtt_offset |
> +			MI_MM_SPACE_GTT |
> +			MI_SAVE_EXT_STATE_EN |
> +			MI_RESTORE_EXT_STATE_EN |
> +			hw_flags);
> +	/* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */
> +	intel_ring_emit(ring, MI_NOOP);
> +
> +	intel_ring_advance(ring);
> +
> +	return ret;
> +
> +}

I know that this started out as a ringbuffer-abstracted thing. But I'd
kinda prefer if it lives together with the other context stuff in
i015_gem_context.c ... Like the pageflip MI_ functions live right next to
the generic pageflip code. I won't be too angry though if you ignore me
here ;-P

> +
>  static void cleanup_status_page(struct intel_ring_buffer *ring)
>  {
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8c9f898..0ed98bb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -121,6 +121,7 @@ struct  intel_ring_buffer {
>  	drm_local_map_t map;
>  
>  	struct i915_hw_context *default_context;
> +	struct drm_i915_gem_object *last_context_obj;
>  
>  	void *private;
>  };
> @@ -216,6 +217,10 @@ static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno)
>  		ring->trace_irq_seqno = seqno;
>  }
>  
> +int intel_ring_mi_set_context(struct intel_ring_buffer *ring,
> +			      struct i915_hw_context *new_context,
> +			      u32 hw_flags);
> +
>  /* DRI warts */
>  int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size);
>  
> -- 
> 1.7.9.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48



More information about the Intel-gfx mailing list