[Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

Daniel Vetter daniel at ffwll.ch
Mon Jul 28 18:24:07 CEST 2014


On Fri, Jul 25, 2014 at 01:27:00PM +0100, Chris Wilson wrote:
> At the heart of this change is that the seqno is a too low level of an
> abstraction to handle the growing complexities of command tracking, both
> with the introduction of multiple command queues with execbuffer and the
> potential for reordering with a scheduler. On top of the seqno we have
> the request. Conceptually this is just a fence, but it also has
> substantial bookkeeping of its own in order to track the context and
> batch in flight, for example. It is the central structure upon which we
> can extend with dependency tracking et al.
> 
> As regards the objects, they were using the seqno as a simple fence,
> upon which is check or even wait upon for command completion. This patch
> exchanges that seqno/ring pair with the request itself. For the
> majority, lifetime of the request is ordered by how we retire objects
> then requests. However, both the unlocked waits and probing elsewhere do
> not tie into the normal request lifetimes and so we need to introduce a
> kref. Extending the objects to use the request as the fence naturally
> extends to segregrating read/write fence tracking. This has significance
> for it reduces the number of semaphores we need to emit, reducing the
> likelihood of #54226, and improving performance overall.
> 
> NOTE: this is not against bare drm-intel-nightly and is likely to
> conflict with execlists...
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Brad Volkin <bradley.d.volkin at intel.com>

Ok, read through it and I like overall. Also, right now is the perfect
time to merge it since we're right before the merge window. But this here
needs to be split up a bit to cut out prep patches. I've noticed a few
things in-line, but there's also the mechanical stuff (like dropping the
drm_ prefix from requests).
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c          |  37 +-
>  drivers/gpu/drm/i915/i915_drv.h              | 108 ++--
>  drivers/gpu/drm/i915/i915_gem.c              | 769 ++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
>  drivers/gpu/drm/i915/i915_gem_exec.c         |  10 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c        |  35 +-
>  drivers/gpu/drm/i915/i915_irq.c              |   6 +-
>  drivers/gpu/drm/i915/i915_perf.c             |   6 +-
>  drivers/gpu/drm/i915/i915_trace.h            |   2 +-
>  drivers/gpu/drm/i915/intel_display.c         |  50 +-
>  drivers/gpu/drm/i915/intel_drv.h             |   3 +-
>  drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  83 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  11 +-
>  17 files changed, 745 insertions(+), 556 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 406e630..676d5f1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
>  static void
>  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  {
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  	struct i915_vma *vma;
>  	int pin_count = 0;
>  
> -	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
> +	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>  		   &obj->base,
>  		   get_pin_flag(obj),
>  		   get_tiling_flag(obj),
> @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		   obj->base.size / 1024,
>  		   obj->base.read_domains,
>  		   obj->base.write_domain,
> -		   obj->last_read_seqno,
> -		   obj->last_write_seqno,
> -		   obj->last_fenced_seqno,
> +		   i915_request_seqno(rq),
> +		   i915_request_seqno(obj->last_write.request),
> +		   i915_request_seqno(obj->last_fence.request),
>  		   i915_cache_level_str(obj->cache_level),
>  		   obj->dirty ? " dirty" : "",
>  		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		*t = '\0';
>  		seq_printf(m, " (%s mappable)", s);
>  	}
> -	if (obj->ring != NULL)
> -		seq_printf(m, " (%s)", obj->ring->name);
> +	if (rq)
> +		seq_printf(m, " (%s)", rq->ring->name);
>  	if (obj->frontbuffer_bits)
>  		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
>  }
> @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  			if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
>  				continue;
>  
> -			if (obj->ring) /* XXX per-vma statistic */
> +			if (obj->active) /* XXX per-vma statistic */
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  	} else {
>  		if (i915_gem_obj_ggtt_bound(obj)) {
>  			stats->global += obj->base.size;
> -			if (obj->ring)
> +			if (obj->active)
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>  				seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
>  					   pipe, plane);
>  			}
> -			if (work->ring)
> +			if (work->flip_queued_request) {
> +				struct i915_gem_request *rq = work->flip_queued_request;
>  				seq_printf(m, "Flip queued on %s at seqno %u, now %u\n",
> -						work->ring->name,
> -						work->flip_queued_seqno,
> -						work->ring->get_seqno(work->ring, true));
> -			else
> +						rq->ring->name, rq->seqno,
> +						rq->ring->get_seqno(rq->ring, true));
> +			} else
>  				seq_printf(m, "Flip not associated with any ring\n");
>  			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
>  				   work->flip_queued_vblank,
> @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  	struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> -	struct drm_i915_gem_request *gem_request;
> +	struct i915_gem_request *rq;
>  	int ret, count, i;
>  
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  			continue;
>  
>  		seq_printf(m, "%s requests:\n", ring->name);
> -		list_for_each_entry(gem_request,
> -				    &ring->request_list,
> -				    list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			seq_printf(m, "    %d @ %d\n",
> -				   gem_request->seqno,
> -				   (int) (jiffies - gem_request->emitted_jiffies));
> +				   rq->seqno,
> +				   (int)(jiffies - rq->emitted_jiffies));
>  		}
>  		count++;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9837b0f..5794d096 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -187,6 +187,7 @@ enum hpd_pin {
>  struct drm_i915_private;
>  struct i915_mm_struct;
>  struct i915_mmu_object;
> +struct i915_gem_request;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> -	struct list_head ring_list;
>  	/** Used in execbuf to temporarily hold a ref */
>  	struct list_head obj_exec_link;
>  
>  	/**
>  	 * This is set if the object is on the active lists (has pending
> -	 * rendering and so a non-zero seqno), and is not set if it i s on
> -	 * inactive (ready to be unbound) list.
> +	 * rendering and so a submitted request), and is not set if it is on
> +	 * inactive (ready to be unbound) list. We track activity per engine.
>  	 */
> -	unsigned int active:1;
> +	unsigned int active:3;

Could we #define this and then add a BUILG_BUG_ON that 1 << shift >=
NUM_RINGS?

>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
>  	void *dma_buf_vmapping;
>  	int vmapping_count;
>  
> -	struct intel_engine_cs *ring;
> -
> -	/** Breadcrumb of last rendering to the buffer. */
> -	uint32_t last_read_seqno;
> -	uint32_t last_write_seqno;
> -	/** Breadcrumb of last fenced GPU access to the buffer. */
> -	uint32_t last_fenced_seqno;
> +	/** Breadcrumbs of last rendering to the buffer. */
> +	struct {
> +		struct i915_gem_request *request;
> +		struct list_head ring_list;
> +	} last_write, last_read[I915_NUM_RINGS], last_fence;
>  
>  	/** Current tiling stride for the object, if it's tiled. */
>  	uint32_t stride;
> @@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
> +
>  void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits);
> @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>   * sequence-number comparisons on buffer last_rendering_seqnos, and associate
>   * an emission time with seqnos for tracking how far ahead of the GPU we are.
>   */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> +	struct kref kref;
> +
>  	/** On Which ring this request was generated */
>  	struct intel_engine_cs *ring;
>  
> @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
>  	struct drm_i915_file_private *file_priv;
>  	/** file_priv list entry for this request */
>  	struct list_head client_list;
> +
> +	bool completed:1;
>  };
>  
> +static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> +	return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq, bool lazy)
> +{
> +	if (!rq->completed)
> +		rq->completed = __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
> +						    rq->seqno);
> +	return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_get(&rq->kref);
> +	return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_put(&rq->kref, __i915_request_free);
> +}
> +
>  struct drm_i915_file_private {
>  	struct drm_i915_private *dev_priv;
>  	struct drm_file *file;
> @@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to);
> +			 struct intel_engine_cs *to,
> +			 bool readonly);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring);
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced);
> +#define VMA_IS_FENCED 0x1
> +#define VMA_HAS_FENCE 0x2
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
>  int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
>  		      uint32_t handle, uint64_t *offset);
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> -	return (int32_t)(seq1 - seq2) >= 0;
> -}
>  
>  int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
>  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
> @@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
>  bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
>  void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring);
>  
>  bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
>  				      bool interruptible);
> -int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
> +int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
>  
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> @@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *batch_obj,
> -		       u32 *seqno);
> -#define i915_add_request(ring, seqno) \
> -	__i915_add_request(ring, NULL, NULL, seqno)
> -int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
> -				 uint32_t seqno);
> +		       struct drm_i915_gem_object *batch_obj);
> +#define i915_add_request(ring) \
> +	__i915_add_request(ring, NULL, NULL)
> +int __must_check i915_wait_request(struct i915_gem_request *rq);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f3ad6fb..d208658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -48,8 +48,6 @@ static __must_check int
>  i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  					    struct drm_i915_file_private *file_priv,
>  					    bool readonly);
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj);
>  
>  static void i915_gem_write_fence(struct drm_device *dev, int reg,
>  				 struct drm_i915_gem_object *obj);
> @@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
>  	spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> +	intel_fb_obj_flush(obj, true);
> +	obj->last_write.request = NULL;
> +	list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> +	obj->last_fence.request = NULL;
> +	list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> +			     struct intel_engine_cs *ring)
> +{
> +	struct i915_vma *vma;
> +
> +	BUG_ON(obj->active == 0);
> +	BUG_ON(obj->base.write_domain);
> +
> +	obj->last_read[ring->id].request = NULL;
> +	list_del_init(&obj->last_read[ring->id].ring_list);
> +
> +	if (--obj->active)
> +		return;
> +
> +	BUG_ON(obj->last_write.request);
> +	BUG_ON(obj->last_fence.request);
> +
> +	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +		if (!list_empty(&vma->mm_list))
> +			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> +	}
> +
> +	drm_gem_object_unreference(&obj->base);
> +
> +	WARN_ON(i915_verify_lists(dev));
> +}
> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_request *rq;
> +	int i;
> +
> +	if (!obj->active)
> +		return;
> +
> +	rq = obj->last_write.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__write(obj);
> +
> +	rq = obj->last_fence.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__fence(obj);
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		rq = obj->last_read[i].request;
> +		if (rq && i915_request_complete(rq, true))
> +			i915_gem_object_retire__read(obj, rq->ring);
> +	}
> +}
> +
>  static int
>  i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
> @@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * equal.
>   */
>  int
> -i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
> +i915_gem_check_olr(struct i915_gem_request *rq)
>  {
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
>  
>  	ret = 0;
> -	if (seqno == ring->outstanding_lazy_seqno)
> -		ret = i915_add_request(ring, NULL);
> +	if (rq == rq->ring->preallocated_request)
> +		ret = i915_add_request(rq->ring);
>  
>  	return ret;
>  }
> @@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>  }
>  
>  /**
> - * __wait_seqno - wait until execution of seqno has finished
> - * @ring: the ring expected to report seqno
> - * @seqno: duh!
> + * __wait_request - wait until execution of request has finished
> + * @request: the request to wait upon
>   * @reset_counter: reset sequence associated with the given seqno
>   * @interruptible: do an interruptible wait (normally yes)
>   * @timeout: in - how long to wait (NULL forever); out - how much time remaining
> @@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>   * Returns 0 if the seqno was found within the alloted time. Else returns the
>   * errno with remaining time filled in timeout argument.
>   */
> -static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> -			unsigned reset_counter,
> -			bool interruptible,
> -			struct timespec *timeout,
> -			struct drm_i915_file_private *file_priv)
> +static int __wait_request(struct i915_gem_request *rq,
> +			  unsigned reset_counter,
> +			  bool interruptible,
> +			  struct timespec *timeout,
> +			  struct drm_i915_file_private *file_priv)
>  {
> +	struct intel_engine_cs *ring = rq->ring;
>  	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	const bool irq_test_in_progress =
>  		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
>  	struct timespec before, now;
>  	DEFINE_WAIT(wait);
>  	unsigned long timeout_expire;
> +	u32 seqno = rq->seqno;
>  	int ret;
>  
>  	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> +	if (i915_request_complete(rq, true))
>  		return 0;
>  
>  	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
> @@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  			break;
>  		}
>  
> -		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> +		if (i915_request_complete(rq, false)) {
>  			ret = 0;
>  			break;
>  		}
> @@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>   * request and object lists appropriately for that event.
>   */
>  int
> -i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
> +i915_wait_request(struct i915_gem_request *rq)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	bool interruptible = dev_priv->mm.interruptible;
> +	struct drm_device *dev = rq->ring->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> -	BUG_ON(seqno == 0);
> +	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
> +		return -EINVAL;
> +
> +	if (i915_request_complete(rq, true))
> +		return 0;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> +	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +				   dev_priv->mm.interruptible);
>  	if (ret)
>  		return ret;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> +	ret = i915_gem_check_olr(rq);
>  	if (ret)
>  		return ret;
>  
> -	return __wait_seqno(ring, seqno,
> -			    atomic_read(&dev_priv->gpu_error.reset_counter),
> -			    interruptible, NULL, NULL);
> -}
> -
> -static int
> -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> -				     struct intel_engine_cs *ring)
> -{
> -	if (!obj->active)
> -		return 0;
> -
> -	/* Manually manage the write flush as we may have not yet
> -	 * retired the buffer.
> -	 *
> -	 * Note that the last_write_seqno is always the earlier of
> -	 * the two (read/write) seqno, so if we haved successfully waited,
> -	 * we know we have passed the last write.
> -	 */
> -	obj->last_write_seqno = 0;
> -
> -	return 0;
> +	return __wait_request(rq,
> +			      atomic_read(&dev_priv->gpu_error.reset_counter),
> +			      dev_priv->mm.interruptible, NULL, NULL);
>  }
>  
>  /**
> @@ -1544,19 +1594,37 @@ static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly)
>  {
> -	struct intel_engine_cs *ring = obj->ring;
> -	u32 seqno;
> -	int ret;
> +	int i, ret;
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> -		return 0;
> +	if (readonly) {
> +		if (obj->last_write.request == NULL)
> +			return 0;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> -	if (ret)
> -		return ret;
> +		ret = i915_wait_request(obj->last_write.request);
> +		if (ret)
> +			return ret;
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			ret = i915_wait_request(obj->last_read[i].request);
> +			if (ret)
> +				return ret;
> +		}
> +	}
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +	/* Manually manage the write flush as we may have not yet
> +	 * retired the buffer.
> +	 *
> +	 * Note that the last_write_seqno is always the earlier of
> +	 * the two (read/write) seqno, so if we haved successfully waited,
> +	 * we know we have passed the last write.
> +	 */
> +	if (obj->last_write.request)
> +		i915_gem_object_retire__write(obj);
> +
> +	return 0;
>  }
>  
>  /* A nonblocking variant of the above wait. This is a highly dangerous routine
> @@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = obj->ring;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno;
> -	int ret;
> +	int i, n, ret;
>  
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> +	n = 0;
> +	if (readonly) {
> +		if (obj->last_write.request)
> +			rq[n++] = i915_request_get(obj->last_write.request);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++)
> +			if (obj->last_read[i].request)
> +				rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +	if (n == 0)
>  		return 0;
>  
>  	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
>  	if (ret)
> -		return ret;
> +		goto out;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> -	if (ret)
> -		return ret;
> +	for (i = 0; i < n; i++) {
> +		ret = i915_gem_check_olr(rq[i]);
> +		if (ret)
> +			goto out;
> +	}
>  
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
> +
> +	for (i = 0; ret == 0 && i < n; i++)
> +		ret = __wait_request(rq[i], reset_counter, true, NULL, file_priv);
> +
>  	mutex_lock(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> +	for (i = 0; i < n; i++)
> +		i915_request_put(rq[i]);
> +
> +	return ret;
>  }
>  
>  /**
> @@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> -static void
> -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -			       struct intel_engine_cs *ring)
> -{
> -	u32 seqno = intel_ring_get_seqno(ring);
> -
> -	BUG_ON(ring == NULL);
> -	if (obj->ring != ring && obj->last_write_seqno) {
> -		/* Keep the seqno relative to the current ring */
> -		obj->last_write_seqno = seqno;
> -	}
> -	obj->ring = ring;
> -
> -	/* Add a reference if we're newly entering the active list. */
> -	if (!obj->active) {
> -		drm_gem_object_reference(&obj->base);
> -		obj->active = 1;
> -	}
> -
> -	list_move_tail(&obj->ring_list, &ring->active_list);
> -
> -	obj->last_read_seqno = seqno;
> -}
> -
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced)
>  {
> -	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = intel_ring_get_request(ring);
> +	u32 old_read = obj->base.read_domains;
> +	u32 old_write = obj->base.write_domain;
>  
> -static void
> -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_vma *vma;
> +	BUG_ON(rq == NULL);
>  
> -	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
> -	BUG_ON(!obj->active);
> +	obj->base.write_domain = obj->base.pending_write_domain;
> +	if (obj->base.write_domain == 0)
> +		obj->base.pending_read_domains |= obj->base.read_domains;
> +	obj->base.read_domains = obj->base.pending_read_domains;
>  
> -	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> -		if (!list_empty(&vma->mm_list))
> -			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> -	}
> -
> -	intel_fb_obj_flush(obj, true);
> -
> -	list_del_init(&obj->ring_list);
> -	obj->ring = NULL;
> +	obj->base.pending_read_domains = 0;
> +	obj->base.pending_write_domain = 0;
>  
> -	obj->last_read_seqno = 0;
> -	obj->last_write_seqno = 0;
> -	obj->base.write_domain = 0;
> +	trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +	if (obj->base.read_domains == 0)
> +		return;
>  
> -	obj->last_fenced_seqno = 0;
> +	/* Add a reference if we're newly entering the active list. */
> +	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
> +		drm_gem_object_reference(&obj->base);
>  
> -	obj->active = 0;
> -	drm_gem_object_unreference(&obj->base);
> +	obj->last_read[ring->id].request = rq;
> +	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
>  
> -	WARN_ON(i915_verify_lists(dev));
> -}
> +	if (obj->base.write_domain) {
> +		obj->dirty = 1;
> +		obj->last_write.request = rq;
> +		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
> +		intel_fb_obj_invalidate(obj, ring);
>  
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj)
> -{
> -	struct intel_engine_cs *ring = obj->ring;
> +		/* update for the implicit flush after a batch */
> +		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> +	}
>  
> -	if (ring == NULL)
> -		return;
> +	if (fenced) {
> +		obj->last_fence.request = rq;
> +		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
> +		if (fenced & 2) {

Please use the #define here ...

> +			struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> +					&dev_priv->mm.fence_list);
> +		}
> +	}
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_read_seqno))
> -		i915_gem_object_move_to_inactive(obj);
> +	list_move_tail(&vma->mm_list, &vma->vm->active_list);
>  }
>  
>  static int
> @@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *obj,
> -		       u32 *out_seqno)
> +		       struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	u32 request_ring_position, request_start;
>  	int ret;
>  
> @@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request = ring->preallocated_lazy_request;
> -	if (WARN_ON(request == NULL))
> +	rq = ring->preallocated_request;
> +	if (WARN_ON(rq == NULL))
>  		return -ENOMEM;
>  
>  	/* Record the position of the start of the request so that
> @@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request->seqno = intel_ring_get_seqno(ring);
> -	request->ring = ring;
> -	request->head = request_start;
> -	request->tail = request_ring_position;
> +	rq->head = request_start;
> +	rq->tail = request_ring_position;
>  
>  	/* Whilst this request exists, batch_obj will be on the
>  	 * active_list, and so will hold the active reference. Only when this
> @@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 * inactive_list and lose its active reference. Hence we do not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	request->batch_obj = obj;
> +	rq->batch_obj = obj;
>  
>  	/* Hold a reference to the current context so that we can inspect
>  	 * it later in case a hangcheck error event fires.
>  	 */
> -	request->ctx = ring->last_context;
> -	if (request->ctx)
> -		i915_gem_context_reference(request->ctx);
> +	rq->ctx = ring->last_context;
> +	if (rq->ctx)
> +		i915_gem_context_reference(rq->ctx);
>  
> -	request->emitted_jiffies = jiffies;
> -	list_add_tail(&request->list, &ring->request_list);
> -	request->file_priv = NULL;
> +	rq->emitted_jiffies = jiffies;
> +	list_add_tail(&rq->list, &ring->request_list);
> +	rq->file_priv = NULL;
>  
>  	if (file) {
>  		struct drm_i915_file_private *file_priv = file->driver_priv;
>  
>  		spin_lock(&file_priv->mm.lock);
> -		request->file_priv = file_priv;
> -		list_add_tail(&request->client_list,
> +		rq->file_priv = file_priv;
> +		list_add_tail(&rq->client_list,
>  			      &file_priv->mm.request_list);
>  		spin_unlock(&file_priv->mm.lock);
>  	}
>  
> -	trace_i915_gem_request_add(ring, request->seqno);
> -	ring->outstanding_lazy_seqno = 0;
> -	ring->preallocated_lazy_request = NULL;
> +	trace_i915_gem_request_add(ring, rq->seqno);
> +	ring->preallocated_request = NULL;
>  
>  	if (!dev_priv->ums.mm_suspended) {
>  		i915_queue_hangcheck(ring->dev);
> @@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  		intel_mark_busy(dev_priv->dev);
>  	}
>  
> -	if (out_seqno)
> -		*out_seqno = request->seqno;
>  	return 0;
>  }
>  
>  static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +i915_gem_request_remove_from_client(struct i915_gem_request *rq)
>  {
> -	struct drm_i915_file_private *file_priv = request->file_priv;
> +	struct drm_i915_file_private *file_priv = rq->file_priv;
>  
>  	if (!file_priv)
>  		return;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_del(&request->client_list);
> -	request->file_priv = NULL;
> +	list_del(&rq->client_list);
> +	rq->file_priv = NULL;
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> @@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> -static void i915_gem_free_request(struct drm_i915_gem_request *request)
> +void __i915_request_free(struct kref *kref)
> +{
> +	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
> +	kfree(rq);
> +}
> +
> +static void i915_request_retire(struct i915_gem_request *rq)
>  {
> -	list_del(&request->list);
> -	i915_gem_request_remove_from_client(request);
> +	rq->completed = true;
> +
> +	list_del(&rq->list);
> +	i915_gem_request_remove_from_client(rq);
>  
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +	if (rq->ctx) {
> +		i915_gem_context_unreference(rq->ctx);
> +		rq->ctx = NULL;
> +	}
>  
> -	kfree(request);
> +	i915_request_put(rq);
>  }
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> -	u32 completed_seqno;
> +	struct i915_gem_request *rq;
>  
> -	completed_seqno = ring->get_seqno(ring, false);
> -
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (i915_seqno_passed(completed_seqno, request->seqno))
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (i915_request_complete(rq, false))
>  			continue;
>  
> -		return request;
> +		return rq;
>  	}
>  
>  	return NULL;
> @@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
>  static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  				       struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	bool ring_hung;
>  
> -	request = i915_gem_find_active_request(ring);
> +	rq = i915_gem_find_active_request(ring);
>  
> -	if (request == NULL)
> +	if (rq == NULL)
>  		return;
>  
>  	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> +	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
>  
> -	list_for_each_entry_continue(request, &ring->request_list, list)
> -		i915_set_reset_status(dev_priv, request->ctx, false);
> +	list_for_each_entry_continue(rq, &ring->request_list, list)
> +		i915_set_reset_status(dev_priv, rq->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  					struct intel_engine_cs *ring)
>  {
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> +		obj = list_first_entry(&ring->write_list,
>  				       struct drm_i915_gem_object,
> -				       ring_list);
> +				       last_write.ring_list);
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
> +
> +		i915_gem_object_retire__fence(obj);
> +	}
> +
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		i915_gem_object_retire__read(obj, ring);
>  	}
>  
>  	/*
> @@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  	 * the request.
>  	 */
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	/* These may not have been flush before the reset, do so now */
> -	kfree(ring->preallocated_lazy_request);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	kfree(ring->preallocated_request);
> +	ring->preallocated_request = NULL;
>  }
>  
>  void i915_gem_restore_fences(struct drm_device *dev)
> @@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  	 * by the ringbuffer to the flushing/inactive lists as appropriate,
>  	 * before we free the context associated with the requests.
>  	 */
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->write_list,
> +				       struct drm_i915_gem_object,
> +				       last_write.ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_write.request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> -				      struct drm_i915_gem_object,
> -				      ring_list);
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
>  
> -		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_fence.request->seqno))
>  			break;
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__fence(obj);
>  	}
>  
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_read[ring->id].request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__read(obj, ring);
> +	}
>  
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		if (!i915_seqno_passed(seqno, request->seqno))
> +		if (!__i915_seqno_passed(seqno, rq->seqno))
>  			break;
>  
> -		trace_i915_gem_request_retire(ring, request->seqno);
> +		trace_i915_gem_request_retire(ring, rq->seqno);
>  		/* We know the GPU must have read the request to have
>  		 * sent us the seqno + interrupt, so use the position
>  		 * of tail of the request to update the last known position
>  		 * of the GPU head.
>  		 */
> -		ring->buffer->last_retired_head = request->tail;
> +		ring->buffer->last_retired_head = rq->tail;
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> -		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> +		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
>  		ring->irq_put(ring);
>  		ring->trace_irq_seqno = 0;
>  	}
> @@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  static int
>  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  {
> -	int ret;
> +	int i;
>  
> -	if (obj->active) {
> -		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
> +	if (!obj->active)
> +		return 0;
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		struct i915_gem_request *rq = obj->last_read[i].request;
> +		int ret;
> +
> +		if (rq == NULL)
> +			continue;
> +
> +		ret = i915_gem_check_olr(rq);
>  		if (ret)
>  			return ret;
>  
> -		i915_gem_retire_requests_ring(obj->ring);
> +		i915_gem_retire_requests_ring(rq->ring);
>  	}
>  
>  	return 0;
> @@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_wait *args = data;
>  	struct drm_i915_gem_object *obj;
> -	struct intel_engine_cs *ring = NULL;
>  	struct timespec timeout_stack, *timeout = NULL;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno = 0;
> -	int ret = 0;
> +	int i, n, ret = 0;
>  
>  	if (args->timeout_ns >= 0) {
>  		timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (ret)
>  		goto out;
>  
> -	if (obj->active) {
> -		seqno = obj->last_read_seqno;
> -		ring = obj->ring;
> -	}
> -
> -	if (seqno == 0)
> -		 goto out;
> +	if (!obj->active)
> +		goto out;
>  
>  	/* Do this after OLR check to make sure we make forward progress polling
>  	 * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto out;
>  	}
>  
> +	for (i = n = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +
>  	drm_gem_object_unreference(&obj->base);
> +
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
> +	for (i = 0; i < n; i++) {
> +		if (ret == 0)
> +			ret = __wait_request(rq[i], reset_counter, true, timeout, file->driver_priv);
> +
> +		i915_request_put(rq[i]);
> +	}
> +
>  	if (timeout)
>  		args->timeout_ns = timespec_to_ns(timeout);
>  	return ret;
> @@ -3024,6 +3150,45 @@ out:
>  	return ret;
>  }
>  
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> +		  struct intel_engine_cs *to,
> +		  struct drm_i915_gem_object *obj)
> +{
> +	int ret, idx;
> +
> +	if (to == NULL)
> +		return i915_wait_request(rq);
> +
> +	/* XXX this is broken by VEBOX+ */
> +	idx = intel_ring_sync_index(rq->ring, to);
> +
> +	/* Optimization: Avoid semaphore sync when we are sure we already
> +	 * waited for an object with higher seqno */
> +	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
> +		return 0;
> +
> +	ret = i915_gem_check_olr(rq);
> +	if (ret)
> +		return ret;
> +
> +	if (!i915_request_complete(rq, true)) {
> +		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
> +		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* We must recheck last_reqad_request because sync_to()
> +	 * might have just caused seqno wrap under
> +	 * the radar.
> +	 */
> +	if (obj->last_read[rq->ring->id].request == rq)
> +		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_gem_object_sync - sync an object to a ring.
>   *
> @@ -3038,44 +3203,35 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to,
> +		     bool readonly)
>  {
> -	struct intel_engine_cs *from = obj->ring;
> -	u32 seqno;
> -	int ret, idx;
> +	struct i915_gem_request *rq;
> +	struct intel_engine_cs *semaphore;
> +	int ret = 0, i;
>  
> -	if (from == NULL || to == from)
> -		return 0;
> +	semaphore = NULL;
> +	if (i915_semaphore_is_enabled(obj->base.dev))
> +		semaphore = to;
>  
> -	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> -		return i915_gem_object_wait_rendering(obj, false);
> -
> -	/* XXX this is broken by VEBOX+ */
> -	idx = intel_ring_sync_index(from, to);
> -
> -	seqno = obj->last_read_seqno;
> -	/* Optimization: Avoid semaphore sync when we are sure we already
> -	 * waited for an object with higher seqno */
> -	if (seqno <= from->semaphore.sync_seqno[idx])
> -		return 0;
> -
> -	ret = 0;
> -	if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
> -		ret = i915_gem_check_olr(from, seqno);
> -		if (ret)
> -			return ret;
> +	if (readonly) {
> +		rq = obj->last_write.request;
> +		if (rq != NULL && to != rq->ring)
> +			ret = i915_request_sync(rq, semaphore, obj);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			rq = obj->last_read[i].request;
> +			if (rq == NULL || to == rq->ring)
> +				continue;
>  
> -		trace_i915_gem_ring_sync_to(from, to, seqno);
> -		ret = to->semaphore.sync_to(to, from, seqno);
> +			ret = i915_request_sync(rq, semaphore, obj);
> +			if (ret)
> +				break;
> +		}
>  	}
> -	if (!ret)
> -		/* We use last_read_seqno because sync_to()
> -		 * might have just caused seqno wrap under
> -		 * the radar.
> -		 */
> -		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
>  
>  	return ret;
> +
>  }
>  
>  static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> @@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
>  static int
>  i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
>  {
> -	if (obj->last_fenced_seqno) {
> -		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
> -		if (ret)
> -			return ret;
> +	int ret;
>  
> -		obj->last_fenced_seqno = 0;
> -	}
> +	if (obj->last_fence.request == NULL)
> +		return 0;
>  
> +	ret = i915_wait_request(obj->last_fence.request);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_retire__fence(obj);
>  	return 0;
>  }
>  
> @@ -3836,11 +3994,12 @@ int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  {
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>  	uint32_t old_write_domain, old_read_domains;
>  	int ret;
>  
>  	/* Not valid to be called on unbound objects. */
> -	if (!i915_gem_obj_bound_any(obj))
> +	if (vma == NULL)
>  		return -EINVAL;
>  
>  	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	if (!obj->active) {
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> -		if (vma)
> -			list_move_tail(&vma->mm_list,
> -				       &dev_priv->gtt.base.inactive_list);
> -
> -	}
> -
> +	list_move_tail(&vma->mm_list,
> +		       &dev_priv->gtt.base.inactive_list);

We've lost the obj->active check here and I didn't spot anything that
would justify that.

>  	return 0;
>  }
>  
> @@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	bool was_pin_display;
>  	int ret;
>  
> -	if (pipelined != obj->ring) {
> -		ret = i915_gem_object_sync(obj, pipelined);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_object_sync(obj, pipelined, true);
> +	if (ret)
> +		return ret;
>  
>  	/* Mark the pin_display early so that we account for the
>  	 * display coherency whilst setting up the cache domains.
> @@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
> -	struct drm_i915_gem_request *request;
> -	struct intel_engine_cs *ring = NULL;
> +	struct i915_gem_request *rq;
>  	unsigned reset_counter;
> -	u32 seqno = 0;
>  	int ret;
>  
>  	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> @@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  		return ret;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> -		if (time_after_eq(request->emitted_jiffies, recent_enough))
> +	list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
> +		if (time_after_eq(rq->emitted_jiffies, recent_enough))
>  			break;
> -
> -		ring = request->ring;
> -		seqno = request->seqno;
>  	}
> +	rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? NULL : rq);
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	spin_unlock(&file_priv->mm.lock);
>  
> -	if (seqno == 0)
> +	if (rq == NULL)
>  		return 0;
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
> +	ret = __wait_request(rq, reset_counter, true, NULL, NULL);
>  	if (ret == 0)
>  		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>  
> +	i915_request_put(rq);
>  	return ret;
>  }
>  
> @@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_busy *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int ret;
> +	int ret, i;
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> @@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 */
>  	ret = i915_gem_object_flush_active(obj);
>  
> -	args->busy = obj->active;
> -	if (obj->ring) {
> +	args->busy = 0;
> +	if (obj->active) {
>  		BUILD_BUG_ON(I915_NUM_RINGS > 16);

Hm, this suggests we should size active to be 4 bits. Just to stay
consistent.

> -		args->busy |= intel_ring_flag(obj->ring) << 16;
> +		args->busy |= 1;
> +		for (i = 0; i < I915_NUM_RINGS; i++)  {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			args->busy |= 1 << (16 + i);
> +		}
>  	}
>  
>  	drm_gem_object_unreference(&obj->base);
> @@ -4584,8 +4738,13 @@ unlock:
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			  const struct drm_i915_gem_object_ops *ops)
>  {
> +	int i;
> +
>  	INIT_LIST_HEAD(&obj->global_list);
> -	INIT_LIST_HEAD(&obj->ring_list);
> +	INIT_LIST_HEAD(&obj->last_fence.ring_list);
> +	INIT_LIST_HEAD(&obj->last_write.ring_list);
> +	for (i = 0; i < I915_NUM_RINGS; i++)
> +		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
>  	INIT_LIST_HEAD(&obj->obj_exec_link);
>  	INIT_LIST_HEAD(&obj->vma_list);
>  
> @@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
>  static void
>  init_ring_lists(struct intel_engine_cs *ring)
>  {
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
> +	INIT_LIST_HEAD(&ring->fence_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  }
>  
> @@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  	 */
>  	spin_lock(&file_priv->mm.lock);
>  	while (!list_empty(&file_priv->mm.request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&file_priv->mm.request_list,
> -					   struct drm_i915_gem_request,
> -					   client_list);
> -		list_del(&request->client_list);
> -		request->file_priv = NULL;
> +		rq = list_first_entry(&file_priv->mm.request_list,
> +				      struct i915_gem_request,
> +				      client_list);
> +		list_del(&rq->client_list);
> +		rq->file_priv = NULL;
>  	}
>  	spin_unlock(&file_priv->mm.lock);
>  }
> @@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_vma *vma;
>  
> -	/* This WARN has probably outlived its usefulness (callers already
> -	 * WARN if they don't find the GGTT vma they expect). When removing,
> -	 * remember to remove the pre-check in is_pin_display() as well */
> -	if (WARN_ON(list_empty(&obj->vma_list)))
> -		return NULL;
> -

Smells like a separate patch. Maybe do it up-front if taking it out is too
invasive.

>  	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
>  	if (vma->vm != obj_to_ggtt(obj))
>  		return NULL;
>  
>  	return vma;
>  }
> +
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)

This one needs a big warning that it's only suitable as a hint for error
state and debugfs. If execbuf gets stuck in the slowpath we might end up
with slightly out-of-order reads (since now they don't sync cross-engine
any more).

> +{
> +	u32 seqno = 0;
> +	struct i915_gem_request *rq = NULL;
> +	int i;
> +
> +	/* This is approximate as seqno cannot be used across rings */
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
> +			rq = obj->last_read[i].request, seqno = rq->seqno;
> +	}
> +
> +	return rq;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 79dc77b..690e2dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
>  		if (!lctx)
>  			continue;
>  
> -		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
> +		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
>  						      get_context_alignment(dev), 0));
> -			/* Fake a finish/inactive */
> -			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
> -			dctx->legacy_hw_ctx.rcs_state->active = 0;
> -		}

Again taste like a separate patch for up-front merging.

>  
>  		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
> @@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
>  		WARN_ON(!dev_priv->ring[RCS].last_context);
>  		if (dev_priv->ring[RCS].last_context == dctx) {
>  			/* Fake switch to NULL context */
> -			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
>  			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>  			i915_gem_context_unreference(dctx);
>  			dev_priv->ring[RCS].last_context = NULL;
> @@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from != NULL) {
> -		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> +		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
> +
> +		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
> +
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> -		from->legacy_hw_ctx.rcs_state->dirty = 1;
> -		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
> +		from_obj->dirty = 1;
>  
>  		/* obj is kept alive until the next request by its active ref */
> -		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> +		i915_gem_object_ggtt_unpin(from_obj);
>  		i915_gem_context_unreference(from);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> index 57d4dde..787ea6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_exec.c
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
>  {
>  	int ret;
>  
> -	ret = i915_gem_object_sync(obj, ring);
> +	ret = i915_gem_object_sync(obj, ring, false);
>  	if (ret)
>  		return ret;
>  
> @@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
>  static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
>  				       struct intel_engine_cs *ring)
>  {
> -	obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> -	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
> -	obj->last_write_seqno = intel_ring_get_seqno(ring);
> -	obj->dirty = 1;

Would be nice to split out the semantic change of moving dirty = 1 into
move_to_active.

> +	obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
> +	obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
>  
>  	ring->gpu_caches_dirty = true;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0faab01..8f1c2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring);
> +
> +		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
>  		if (ret)
>  			return ret;
>  
> @@ -956,40 +957,20 @@ static void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  				   struct intel_engine_cs *ring)
>  {
> -	u32 seqno = intel_ring_get_seqno(ring);
>  	struct i915_vma *vma;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -		struct drm_i915_gem_object *obj = vma->obj;
> -		u32 old_read = obj->base.read_domains;
> -		u32 old_write = obj->base.write_domain;
> -
> -		obj->base.write_domain = obj->base.pending_write_domain;
> -		if (obj->base.write_domain == 0)
> -			obj->base.pending_read_domains |= obj->base.read_domains;
> -		obj->base.read_domains = obj->base.pending_read_domains;
> -
> -		i915_vma_move_to_active(vma, ring);
> -		if (obj->base.write_domain) {
> -			obj->dirty = 1;
> -			obj->last_write_seqno = seqno;
> +		unsigned fenced;
>  
> -			intel_fb_obj_invalidate(obj, ring);
> -
> -			/* update for the implicit flush after a batch */
> -			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> -		}
> +		fenced = 0;
>  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> -			obj->last_fenced_seqno = seqno;
> -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -				struct drm_i915_private *dev_priv = to_i915(ring->dev);
> -				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> -					       &dev_priv->mm.fence_list);
> -			}
> +			fenced |= VMA_IS_FENCED;
> +			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +				fenced |= VMA_HAS_FENCE;
>  		}
>  
> -		trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +		i915_vma_move_to_active(vma, ring, fenced);
>  	}
>  }
>  
> @@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, NULL);
> +	(void)__i915_add_request(ring, file, obj);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index e60be3f..fc1223c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>  	if (ret)
>  		goto out;
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
>  
> -	ret = __i915_add_request(ring, NULL, so.obj, NULL);
> +	ret = __i915_add_request(ring, NULL, so.obj);
>  	/* __i915_add_request moves object to inactive if it fails */
>  out:
>  	render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index af5d31a..e46fb34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>  
>  	if (ret == 0) {
>  		obj->fence_dirty =
> -			obj->last_fenced_seqno ||
> +			obj->last_fence.request ||
>  			obj->fence_reg != I915_FENCE_REG_NONE;
>  		obj->tiling_mode = tiling_mode;
>  		obj->stride = stride;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ebc8529..584b863 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  	if (i915_gem_obj_bound(src, vm))
>  		dst->gtt_offset = i915_gem_obj_offset(src, vm);
>  	else
> -		dst->gtt_offset = -1UL;
> +		dst->gtt_offset = -1;

Spurious change?

>  
>  	reloc_offset = dst->gtt_offset;
>  	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> @@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  		       struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
> -	err->rseqno = obj->last_read_seqno;
> -	err->wseqno = obj->last_write_seqno;
> +	err->rseqno = i915_request_seqno(rq);
> +	err->wseqno = i915_request_seqno(obj->last_write.request);
>  	err->gtt_offset = vma->node.start;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
> @@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
>  	err->userptr = obj->userptr.mm != NULL;
> -	err->ring = obj->ring ? obj->ring->id : -1;
> +	err->ring = i915_request_ring_id(rq);
>  	err->cache_level = obj->cache_level;
>  }
>  
> @@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  				  struct drm_i915_error_state *error)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	int i, count;
>  
>  	for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  
>  		i915_record_ring_state(dev, error, ring, &error->ring[i]);
>  
> -		request = i915_gem_find_active_request(ring);
> -		if (request) {
> +		rq = i915_gem_find_active_request(ring);

This reminds me that our locking for the error state capture and also the
guilty batch determination is fairly ... nonexistent. This will be a fun
problem to fix once we make reset more common with per-engine resets and
short-lived timers for media workloads. Anyway, unrelated comment.

> +		if (rq) {
>  			/* We need to copy these to an anonymous buffer
>  			 * as the simplest method to avoid being overwritten
>  			 * by userspace.
>  			 */
>  			error->ring[i].batchbuffer =
>  				i915_error_object_create(dev_priv,
> -							 request->batch_obj,
> -							 request->ctx ?
> -							 request->ctx->vm :
> +							 rq->batch_obj,
> +							 rq->ctx ?
> +							 rq->ctx->vm :
>  							 &dev_priv->gtt.base);
>  
>  			if (HAS_BROKEN_CS_TLB(dev_priv))
> @@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  					i915_error_ggtt_object_create(dev_priv,
>  							     ring->scratch.obj);
>  
> -			if (request->file_priv) {
> +			if (rq->file_priv) {
>  				struct task_struct *task;
>  
>  				rcu_read_lock();
> -				task = pid_task(request->file_priv->file->pid,
> +				task = pid_task(rq->file_priv->file->pid,
>  						PIDTYPE_PID);
>  				if (task) {
>  					strcpy(error->ring[i].comm, task->comm);
> @@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list)
> +		list_for_each_entry(rq, &ring->request_list, list)
>  			count++;
>  
>  		error->ring[i].num_requests = count;
> @@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		}
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			struct drm_i915_error_request *erq;
>  
>  			erq = &error->ring[i].requests[count++];
> -			erq->seqno = request->seqno;
> -			erq->jiffies = request->emitted_jiffies;
> -			erq->tail = request->tail;
> +			erq->seqno = rq->seqno;
> +			erq->jiffies = rq->emitted_jiffies;
> +			erq->tail = rq->tail;
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 717c111..6d4f5a7 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2935,14 +2935,14 @@ static u32
>  ring_last_seqno(struct intel_engine_cs *ring)
>  {
>  	return list_entry(ring->request_list.prev,
> -			  struct drm_i915_gem_request, list)->seqno;
> +			  struct i915_gem_request, list)->seqno;
>  }
>  
>  static bool
>  ring_idle(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	return (list_empty(&ring->request_list) ||
> -		i915_seqno_passed(seqno, ring_last_seqno(ring)));
> +		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
>  }
>  
>  static bool
> @@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
>  	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
>  		return -1;
>  
> -	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> +	if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
>  		return 1;
>  
>  	/* cursory check for an unkickable deadlock */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 75f423d..f1c2a28 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
>  	int i;
>  
>  	for_each_ring(ring, i915, i) {
> -		struct drm_i915_gem_request *rq;
> +		struct i915_gem_request *rq;
>  
>  		if (list_empty(&ring->request_list))
>  			continue;
>  
>  		rq = list_last_entry(&ring->request_list,
> -				     struct drm_i915_gem_request,
> +				     struct i915_gem_request,
>  				     list);
>  
> -		if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
> +		if (i915_request_complete(rq, true))
>  			continue;
>  
>  		return true;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 63f6875..0ebd85d 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
>  	    TP_fast_assign(
>  			   __entry->dev = ring->dev->primary->index;
>  			   __entry->ring = ring->id;
> -			   __entry->seqno = intel_ring_get_seqno(ring),
> +			   __entry->seqno = intel_ring_get_request(ring)->seqno,
>  			   __entry->flags = flags;
>  			   i915_trace_irq_get(ring, __entry->seqno);
>  			   ),
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d828f47..9b7931c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
>  	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
>  	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>  
> +	i915_request_put(work->flip_queued_request);
>  	kfree(work);
>  }
>  
> @@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
>  	else if (i915.use_mmio_flip > 0)
>  		return true;
>  	else
> -		return ring != obj->ring;
> +		return ring != i915_request_ring(obj->last_write.request);
>  }
>  
>  static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> @@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
>  
>  static int intel_postpone_flip(struct drm_i915_gem_object *obj)
>  {
> -	struct intel_engine_cs *ring;
> +	struct i915_gem_request *rq = obj->last_write.request;
>  	int ret;
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	if (!obj->last_write_seqno)
> -		return 0;
> -
> -	ring = obj->ring;
> -
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_write_seqno))
> +	if (rq == NULL)
>  		return 0;
>  
> -	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
> +	ret = i915_gem_check_olr(rq);
>  	if (ret)
>  		return ret;
>  
> -	if (WARN_ON(!ring->irq_get(ring)))
> +	if (i915_request_complete(rq, true))
> +		return 0;
> +
> +	if (WARN_ON(!rq->ring->irq_get(rq->ring)))
>  		return 0;
>  
>  	return 1;
> @@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs *ring)
>  		if (ring->id != mmio_flip->ring_id)
>  			continue;
>  
> -		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> +		if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
>  			intel_do_mmio_flip(intel_crtc);
>  			mmio_flip->seqno = 0;
>  			ring->irq_put(ring);
> @@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct i915_gem_request *rq;
>  	unsigned long irq_flags;
>  	int ret;
>  
> @@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
>  		return 0;
>  	}
>  
> +	rq = obj->last_write.request;
> +	if (WARN_ON(rq == NULL))
> +		return 0;
> +
>  	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> -	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> -	intel_crtc->mmio_flip.ring_id = obj->ring->id;
> +	intel_crtc->mmio_flip.seqno = rq->seqno;
> +	intel_crtc->mmio_flip.ring_id = rq->ring->id;
>  	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
>  
>  	/*
>  	 * Double check to catch cases where irq fired before
>  	 * mmio flip data was ready
>  	 */
> -	intel_notify_mmio_flip(obj->ring);
> +	intel_notify_mmio_flip(rq->ring);
>  	return 0;
>  }
>  
> @@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
>  		return false;
>  
>  	if (work->flip_ready_vblank == 0) {
> -		if (work->ring &&
> -		    !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
> -				      work->flip_queued_seqno))
> +		struct i915_gem_request *rq = work->flip_queued_request;
> +		if (rq && !i915_request_complete(rq, true))
>  			return false;
>  
>  		work->flip_ready_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> @@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	enum pipe pipe = intel_crtc->pipe;
>  	struct intel_unpin_work *work;
>  	struct intel_engine_cs *ring;
> +	struct i915_gem_request *rq;
>  	unsigned long flags;
>  	int ret;
>  
> @@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	} else if (IS_IVYBRIDGE(dev)) {
>  		ring = &dev_priv->ring[BCS];
>  	} else if (INTEL_INFO(dev)->gen >= 7) {
> -		ring = obj->ring;
> +		ring = i915_request_ring(obj->last_write.request);
>  		if (ring == NULL || ring->id != RCS)
>  			ring = &dev_priv->ring[BCS];
>  	} else {
> @@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	}
>  
>  	if (use_mmio_flip(ring, obj, page_flip_flags)) {
> -		ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(obj->last_write.request));
>  		if (ret)
>  			goto cleanup_pending;
>  
> @@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  		if (ret)
>  			goto cleanup_unpin;
>  
> -		work->flip_queued_seqno = obj->last_write_seqno;
> -		work->ring = obj->ring;
> +		rq = obj->last_write.request;
>  	} else {
>  		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>  		if (ret)
> @@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  		if (ret)
>  			goto cleanup_unpin;
>  
> -		work->flip_queued_seqno = intel_ring_get_seqno(ring);
> -		work->ring = ring;
> +		rq = intel_ring_get_request(ring);
>  	}
>  
> +	work->flip_queued_request = i915_request_get(rq);
>  	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
>  	work->enable_stall_check = true;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 274f77c..5f336a3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -657,14 +657,13 @@ struct intel_unpin_work {
>  	struct drm_i915_gem_object *old_fb_obj;
>  	struct drm_i915_gem_object *pending_flip_obj;
>  	struct drm_pending_vblank_event *event;
> -	struct intel_engine_cs *ring;
>  	atomic_t pending;
>  #define INTEL_FLIP_INACTIVE	0
>  #define INTEL_FLIP_PENDING	1
>  #define INTEL_FLIP_COMPLETE	2
>  	u32 flip_count;
>  	u32 gtt_offset;
> -	u32 flip_queued_seqno;
> +	struct i915_gem_request *flip_queued_request;
>  	int flip_queued_vblank;
>  	int flip_ready_vblank;
>  	bool enable_stall_check;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index d94af27..c709ca5 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -183,7 +183,7 @@ struct intel_overlay {
>  	u32 flip_addr;
>  	struct drm_i915_gem_object *reg_bo;
>  	/* flip handling */
> -	uint32_t last_flip_req;
> +	struct i915_gem_request *flip_request;
>  	void (*flip_tail)(struct intel_overlay *);
>  };
>  
> @@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
>  		io_mapping_unmap(regs);
>  }
>  
> -static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> -					 void (*tail)(struct intel_overlay *))
> +/* recover from an interruption due to a signal
> + * We have to be careful not to repeat work forever an make forward progess. */
> +static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>  	int ret;
>  
> -	BUG_ON(overlay->last_flip_req);
> -	ret = i915_add_request(ring, &overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> +	if (overlay->flip_request == NULL)
> +		return 0;
>  
> -	overlay->flip_tail = tail;
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> +	ret = i915_wait_request(overlay->flip_request);
>  	if (ret)
>  		return ret;
> -	i915_gem_retire_requests(dev);
>  
> -	overlay->last_flip_req = 0;
> +	i915_request_put(overlay->flip_request);
> +	overlay->flip_request = NULL;
> +
> +	i915_gem_retire_requests(overlay->dev);
> +
> +	if (overlay->flip_tail)
> +		overlay->flip_tail(overlay);
> +
>  	return 0;
>  }
>  
> +static int intel_overlay_add_request(struct intel_overlay *overlay,
> +				     struct intel_engine_cs *ring,
> +				     void (*tail)(struct intel_overlay *))
> +{
> +	BUG_ON(overlay->flip_request);
> +	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
> +	overlay->flip_tail = tail;
> +
> +	return i915_add_request(ring);
> +}
> +
> +static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +					 struct intel_engine_cs *ring,
> +					 void (*tail)(struct intel_overlay *))
> +{
> +	intel_overlay_add_request(overlay, ring, tail);
> +	return intel_overlay_recover_from_interrupt(overlay);
> +}
> +
>  /* overlay needs to be disable in OCMD reg */
>  static int intel_overlay_on(struct intel_overlay *overlay)
>  {
> @@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>  	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
>  	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return intel_overlay_do_wait_request(overlay, NULL);
> +	return intel_overlay_do_wait_request(overlay, ring, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  
>  	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
>  	intel_ring_emit(ring, flip_addr);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return i915_add_request(ring, &overlay->last_flip_req);
> +	return intel_overlay_add_request(overlay, ring, NULL);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  {
>  	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>  
> +	i915_gem_track_fb(obj, NULL,
> +			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> +
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
>  
> @@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  		intel_ring_emit(ring, flip_addr);
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	}
> -	intel_ring_advance(ring);
> -
> -	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> -}
> -
> -/* recover from an interruption due to a signal
> - * We have to be careful not to repeat work forever an make forward progess. */
> -static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> -{
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> -
> -	if (overlay->last_flip_req == 0)
> -		return 0;
> +	__intel_ring_advance(ring);
>  
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> -	i915_gem_retire_requests(dev);
> -
> -	if (overlay->flip_tail)
> -		overlay->flip_tail(overlay);
> -
> -	overlay->last_flip_req = 0;
> -	return 0;
> +	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
>  }
>  
>  /* Wait for pending overlay flip and release old frame.
> @@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>   */
>  static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> +	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
> +	int ret = 0;
>  
>  	/* Only wait if there is actually an old frame to release to
>  	 * guarantee forward progress.
> @@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  		return 0;
>  
>  	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
> +		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +
>  		/* synchronous slowpath */
>  		ret = intel_ring_begin(ring, 2);
>  		if (ret)
> @@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  		intel_ring_emit(ring, MI_NOOP);
> -		intel_ring_advance(ring);
> +		__intel_ring_advance(ring);
>  
> -		ret = intel_overlay_do_wait_request(overlay,
> +		ret = intel_overlay_do_wait_request(overlay, ring,
>  						    intel_overlay_release_old_vid_tail);
> -		if (ret)
> -			return ret;
> -	}
> -
> -	intel_overlay_release_old_vid_tail(overlay);
> +	} else
> +		intel_overlay_release_old_vid_tail(overlay);
>  
> -
> -	i915_gem_track_fb(overlay->old_vid_bo, NULL,
> -			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> -	return 0;
> +	return ret;
>  }
>  
>  struct put_image_params {
> @@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
>  	iowrite32(0, &regs->OCMD);
>  	intel_overlay_unmap_regs(overlay, regs);
>  
> -	ret = intel_overlay_off(overlay);
> -	if (ret != 0)
> -		return ret;
> -
> -	intel_overlay_off_tail(overlay);
> -	return 0;
> +	return intel_overlay_off(overlay);
>  }
>  
>  static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7c5a6c5..ae96de5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  					   PIPE_CONTROL_FLUSH_ENABLE);
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		intel_ring_emit(signaller, 0);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
> @@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
>  					   MI_FLUSH_DW_USE_GTT);
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
>  		intel_ring_emit(signaller, 0);
> @@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>  		if (mbox_reg != GEN6_NOSYNC) {
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit(signaller, mbox_reg);
> -			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +			intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		}
>  	}
>  
> @@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_WRITE_FLUSH |
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, 0);
>  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>  			PIPE_CONTROL_NOTIFY);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, 0);
>  	__intel_ring_advance(ring);
>  
> @@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	}
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  	ringbuf->size = 32 * PAGE_SIZE;
>  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
> @@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>  
>  	intel_destroy_ringbuffer_obj(ringbuf);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	ring->preallocated_request = NULL;
>  
>  	if (ring->cleanup)
>  		ring->cleanup(ring);
> @@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  {
>  	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	struct drm_i915_gem_request *request;
> -	u32 seqno = 0;
> +	struct i915_gem_request *rq;
>  	int ret;
>  
>  	if (ringbuf->last_retired_head != -1) {
> @@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  			return 0;
>  	}
>  
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
> -			seqno = request->seqno;
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
>  			break;
> -		}
>  	}
>  
> -	if (seqno == 0)
> +	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
>  		return -ENOSPC;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> +	ret = i915_wait_request(rq);
>  	if (ret)
>  		return ret;
>  
> @@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>  
>  int intel_ring_idle(struct intel_engine_cs *ring)
>  {
> -	u32 seqno;
>  	int ret;
>  
>  	/* We need to add any requests required to flush the objects and ring */
> -	if (ring->outstanding_lazy_seqno) {
> -		ret = i915_add_request(ring, NULL);
> +	if (ring->preallocated_request) {
> +		ret = i915_add_request(ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  	if (list_empty(&ring->request_list))
>  		return 0;
>  
> -	seqno = list_entry(ring->request_list.prev,
> -			   struct drm_i915_gem_request,
> -			   list)->seqno;
> -
> -	return i915_wait_seqno(ring, seqno);
> +	return i915_wait_request(container_of(ring->request_list.prev,
> +					      struct i915_gem_request,
> +					      list));
>  }
>  
>  static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
>  {
> -	if (ring->outstanding_lazy_seqno)
> -		return 0;
> +	struct i915_gem_request *rq;
> +	int ret;
>  
> -	if (ring->preallocated_lazy_request == NULL) {
> -		struct drm_i915_gem_request *request;
> +	if (ring->preallocated_request)
> +		return 0;
>  
> -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> +	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> +	if (rq == NULL)
> +		return -ENOMEM;
>  
> -		ring->preallocated_lazy_request = request;
> +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> +	if (ret) {
> +		kfree(rq);
> +		return ret;
>  	}
>  
> -	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +	kref_init(&rq->kref);
> +	rq->ring = ring;
> +	rq->completed = false;
> +
> +	ring->preallocated_request = rq;
> +	return 0;
>  }
>  
>  static int __intel_ring_prepare(struct intel_engine_cs *ring,
> @@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>  		return ret;
>  
>  	/* Preallocate the olr before touching the ring, */
> -	ret = intel_ring_alloc_seqno(ring);
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>  		return ret;
>  
>  	/* but we may flush the seqno during prepare. */
> -	ret = intel_ring_alloc_seqno(ring);
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	BUG_ON(ring->outstanding_lazy_seqno);
> +	BUG_ON(ring->preallocated_request);
>  
>  	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
>  		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
> @@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
>  	ring->cleanup = render_ring_cleanup;
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  
>  	ringbuf->size = size;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index dcd2e44..2a78051 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -222,7 +222,7 @@ struct  intel_engine_cs {
>  	 *
>  	 * A reference is held on the buffer while on this list.
>  	 */
> -	struct list_head active_list;
> +	struct list_head read_list, write_list, fence_list;
>  
>  	/**
>  	 * List of breadcrumbs associated with GPU requests currently
> @@ -233,8 +233,7 @@ struct  intel_engine_cs {
>  	/**
>  	 * Do we have some not yet emitted requests outstanding?
>  	 */
> -	struct drm_i915_gem_request *preallocated_lazy_request;
> -	u32 outstanding_lazy_seqno;
> +	struct i915_gem_request *preallocated_request;
>  	bool gpu_caches_dirty;
>  	bool fbc_dirty;
>  
> @@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>  	return ringbuf->tail;
>  }
>  
> -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
> +static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
>  {
> -	BUG_ON(ring->outstanding_lazy_seqno == 0);
> -	return ring->outstanding_lazy_seqno;
> +	BUG_ON(ring->preallocated_request == 0);
> +	return ring->preallocated_request;
>  }
>  
>  static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
> -- 
> 1.9.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list