[Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects
Daniel Vetter
daniel at ffwll.ch
Mon Jul 28 18:24:07 CEST 2014
On Fri, Jul 25, 2014 at 01:27:00PM +0100, Chris Wilson wrote:
> At the heart of this change is that the seqno is a too low level of an
> abstraction to handle the growing complexities of command tracking, both
> with the introduction of multiple command queues with execbuffer and the
> potential for reordering with a scheduler. On top of the seqno we have
> the request. Conceptually this is just a fence, but it also has
> substantial bookkeeping of its own in order to track the context and
> batch in flight, for example. It is the central structure upon which we
> can extend with dependency tracking et al.
>
> As regards the objects, they were using the seqno as a simple fence,
> upon which is check or even wait upon for command completion. This patch
> exchanges that seqno/ring pair with the request itself. For the
> majority, lifetime of the request is ordered by how we retire objects
> then requests. However, both the unlocked waits and probing elsewhere do
> not tie into the normal request lifetimes and so we need to introduce a
> kref. Extending the objects to use the request as the fence naturally
> extends to segregrating read/write fence tracking. This has significance
> for it reduces the number of semaphores we need to emit, reducing the
> likelihood of #54226, and improving performance overall.
>
> NOTE: this is not against bare drm-intel-nightly and is likely to
> conflict with execlists...
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Brad Volkin <bradley.d.volkin at intel.com>
Ok, read through it and I like overall. Also, right now is the perfect
time to merge it since we're right before the merge window. But this here
needs to be split up a bit to cut out prep patches. I've noticed a few
things in-line, but there's also the mechanical stuff (like dropping the
drm_ prefix from requests).
-Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 37 +-
> drivers/gpu/drm/i915/i915_drv.h | 108 ++--
> drivers/gpu/drm/i915/i915_gem.c | 769 ++++++++++++++++-----------
> drivers/gpu/drm/i915/i915_gem_context.c | 19 +-
> drivers/gpu/drm/i915/i915_gem_exec.c | 10 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 37 +-
> drivers/gpu/drm/i915/i915_gem_render_state.c | 5 +-
> drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 35 +-
> drivers/gpu/drm/i915/i915_irq.c | 6 +-
> drivers/gpu/drm/i915/i915_perf.c | 6 +-
> drivers/gpu/drm/i915/i915_trace.h | 2 +-
> drivers/gpu/drm/i915/intel_display.c | 50 +-
> drivers/gpu/drm/i915/intel_drv.h | 3 +-
> drivers/gpu/drm/i915/intel_overlay.c | 118 ++--
> drivers/gpu/drm/i915/intel_ringbuffer.c | 83 +--
> drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +-
> 17 files changed, 745 insertions(+), 556 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 406e630..676d5f1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
> static void
> describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> {
> + struct i915_gem_request *rq = i915_gem_object_last_read(obj);
> struct i915_vma *vma;
> int pin_count = 0;
>
> - seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
> + seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
> &obj->base,
> get_pin_flag(obj),
> get_tiling_flag(obj),
> @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> obj->base.size / 1024,
> obj->base.read_domains,
> obj->base.write_domain,
> - obj->last_read_seqno,
> - obj->last_write_seqno,
> - obj->last_fenced_seqno,
> + i915_request_seqno(rq),
> + i915_request_seqno(obj->last_write.request),
> + i915_request_seqno(obj->last_fence.request),
> i915_cache_level_str(obj->cache_level),
> obj->dirty ? " dirty" : "",
> obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> *t = '\0';
> seq_printf(m, " (%s mappable)", s);
> }
> - if (obj->ring != NULL)
> - seq_printf(m, " (%s)", obj->ring->name);
> + if (rq)
> + seq_printf(m, " (%s)", rq->ring->name);
> if (obj->frontbuffer_bits)
> seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
> }
> @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
> continue;
>
> - if (obj->ring) /* XXX per-vma statistic */
> + if (obj->active) /* XXX per-vma statistic */
> stats->active += obj->base.size;
> else
> stats->inactive += obj->base.size;
> @@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> } else {
> if (i915_gem_obj_ggtt_bound(obj)) {
> stats->global += obj->base.size;
> - if (obj->ring)
> + if (obj->active)
> stats->active += obj->base.size;
> else
> stats->inactive += obj->base.size;
> @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
> seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
> pipe, plane);
> }
> - if (work->ring)
> + if (work->flip_queued_request) {
> + struct i915_gem_request *rq = work->flip_queued_request;
> seq_printf(m, "Flip queued on %s at seqno %u, now %u\n",
> - work->ring->name,
> - work->flip_queued_seqno,
> - work->ring->get_seqno(work->ring, true));
> - else
> + rq->ring->name, rq->seqno,
> + rq->ring->get_seqno(rq->ring, true));
> + } else
> seq_printf(m, "Flip not associated with any ring\n");
> seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> work->flip_queued_vblank,
> @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
> struct drm_device *dev = node->minor->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct intel_engine_cs *ring;
> - struct drm_i915_gem_request *gem_request;
> + struct i915_gem_request *rq;
> int ret, count, i;
>
> ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
> continue;
>
> seq_printf(m, "%s requests:\n", ring->name);
> - list_for_each_entry(gem_request,
> - &ring->request_list,
> - list) {
> + list_for_each_entry(rq, &ring->request_list, list) {
> seq_printf(m, " %d @ %d\n",
> - gem_request->seqno,
> - (int) (jiffies - gem_request->emitted_jiffies));
> + rq->seqno,
> + (int)(jiffies - rq->emitted_jiffies));
> }
> count++;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9837b0f..5794d096 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -187,6 +187,7 @@ enum hpd_pin {
> struct drm_i915_private;
> struct i915_mm_struct;
> struct i915_mmu_object;
> +struct i915_gem_request;
>
> enum intel_dpll_id {
> DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
> struct drm_mm_node *stolen;
> struct list_head global_list;
>
> - struct list_head ring_list;
> /** Used in execbuf to temporarily hold a ref */
> struct list_head obj_exec_link;
>
> /**
> * This is set if the object is on the active lists (has pending
> - * rendering and so a non-zero seqno), and is not set if it i s on
> - * inactive (ready to be unbound) list.
> + * rendering and so a submitted request), and is not set if it is on
> + * inactive (ready to be unbound) list. We track activity per engine.
> */
> - unsigned int active:1;
> + unsigned int active:3;
Could we #define this and then add a BUILG_BUG_ON that 1 << shift >=
NUM_RINGS?
>
> /**
> * This is set if the object has been written to since last bound
> @@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
> void *dma_buf_vmapping;
> int vmapping_count;
>
> - struct intel_engine_cs *ring;
> -
> - /** Breadcrumb of last rendering to the buffer. */
> - uint32_t last_read_seqno;
> - uint32_t last_write_seqno;
> - /** Breadcrumb of last fenced GPU access to the buffer. */
> - uint32_t last_fenced_seqno;
> + /** Breadcrumbs of last rendering to the buffer. */
> + struct {
> + struct i915_gem_request *request;
> + struct list_head ring_list;
> + } last_write, last_read[I915_NUM_RINGS], last_fence;
>
> /** Current tiling stride for the object, if it's tiled. */
> uint32_t stride;
> @@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
> };
> #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
> +
> void i915_gem_track_fb(struct drm_i915_gem_object *old,
> struct drm_i915_gem_object *new,
> unsigned frontbuffer_bits);
> @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
> * sequence-number comparisons on buffer last_rendering_seqnos, and associate
> * an emission time with seqnos for tracking how far ahead of the GPU we are.
> */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> + struct kref kref;
> +
> /** On Which ring this request was generated */
> struct intel_engine_cs *ring;
>
> @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
> struct drm_i915_file_private *file_priv;
> /** file_priv list entry for this request */
> struct list_head client_list;
> +
> + bool completed:1;
> };
>
> +static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
> +{
> + return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> + return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> + return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> + return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq, bool lazy)
> +{
> + if (!rq->completed)
> + rq->completed = __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
> + rq->seqno);
> + return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> + if (rq)
> + kref_get(&rq->kref);
> + return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> + if (rq)
> + kref_put(&rq->kref, __i915_request_free);
> +}
> +
> struct drm_i915_file_private {
> struct drm_i915_private *dev_priv;
> struct drm_file *file;
> @@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>
> int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *to);
> + struct intel_engine_cs *to,
> + bool readonly);
> void i915_vma_move_to_active(struct i915_vma *vma,
> - struct intel_engine_cs *ring);
> + struct intel_engine_cs *ring,
> + unsigned fenced);
> +#define VMA_IS_FENCED 0x1
> +#define VMA_HAS_FENCE 0x2
> int i915_gem_dumb_create(struct drm_file *file_priv,
> struct drm_device *dev,
> struct drm_mode_create_dumb *args);
> int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
> uint32_t handle, uint64_t *offset);
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> - return (int32_t)(seq1 - seq2) >= 0;
> -}
>
> int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
> int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
> @@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
> bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
> void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
>
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
> i915_gem_find_active_request(struct intel_engine_cs *ring);
>
> bool i915_gem_retire_requests(struct drm_device *dev);
> void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
> int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> bool interruptible);
> -int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
> +int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
>
> static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
> {
> @@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
> int __must_check i915_gem_suspend(struct drm_device *dev);
> int __i915_add_request(struct intel_engine_cs *ring,
> struct drm_file *file,
> - struct drm_i915_gem_object *batch_obj,
> - u32 *seqno);
> -#define i915_add_request(ring, seqno) \
> - __i915_add_request(ring, NULL, NULL, seqno)
> -int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
> - uint32_t seqno);
> + struct drm_i915_gem_object *batch_obj);
> +#define i915_add_request(ring) \
> + __i915_add_request(ring, NULL, NULL)
> +int __must_check i915_wait_request(struct i915_gem_request *rq);
> int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> int __must_check
> i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f3ad6fb..d208658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -48,8 +48,6 @@ static __must_check int
> i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> struct drm_i915_file_private *file_priv,
> bool readonly);
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj);
>
> static void i915_gem_write_fence(struct drm_device *dev, int reg,
> struct drm_i915_gem_object *obj);
> @@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
> spin_unlock(&dev_priv->mm.object_stat_lock);
> }
>
> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> + intel_fb_obj_flush(obj, true);
> + obj->last_write.request = NULL;
> + list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> + obj->last_fence.request = NULL;
> + list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> + struct intel_engine_cs *ring)
> +{
> + struct i915_vma *vma;
> +
> + BUG_ON(obj->active == 0);
> + BUG_ON(obj->base.write_domain);
> +
> + obj->last_read[ring->id].request = NULL;
> + list_del_init(&obj->last_read[ring->id].ring_list);
> +
> + if (--obj->active)
> + return;
> +
> + BUG_ON(obj->last_write.request);
> + BUG_ON(obj->last_fence.request);
> +
> + list_for_each_entry(vma, &obj->vma_list, vma_link) {
> + if (!list_empty(&vma->mm_list))
> + list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> + }
> +
> + drm_gem_object_unreference(&obj->base);
> +
> + WARN_ON(i915_verify_lists(dev));
> +}
> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> + struct i915_gem_request *rq;
> + int i;
> +
> + if (!obj->active)
> + return;
> +
> + rq = obj->last_write.request;
> + if (rq && i915_request_complete(rq, true))
> + i915_gem_object_retire__write(obj);
> +
> + rq = obj->last_fence.request;
> + if (rq && i915_request_complete(rq, true))
> + i915_gem_object_retire__fence(obj);
> +
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + rq = obj->last_read[i].request;
> + if (rq && i915_request_complete(rq, true))
> + i915_gem_object_retire__read(obj, rq->ring);
> + }
> +}
> +
> static int
> i915_gem_wait_for_error(struct i915_gpu_error *error)
> {
> @@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
> * equal.
> */
> int
> -i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
> +i915_gem_check_olr(struct i915_gem_request *rq)
> {
> int ret;
>
> - BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> + BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
>
> ret = 0;
> - if (seqno == ring->outstanding_lazy_seqno)
> - ret = i915_add_request(ring, NULL);
> + if (rq == rq->ring->preallocated_request)
> + ret = i915_add_request(rq->ring);
>
> return ret;
> }
> @@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
> }
>
> /**
> - * __wait_seqno - wait until execution of seqno has finished
> - * @ring: the ring expected to report seqno
> - * @seqno: duh!
> + * __wait_request - wait until execution of request has finished
> + * @request: the request to wait upon
> * @reset_counter: reset sequence associated with the given seqno
> * @interruptible: do an interruptible wait (normally yes)
> * @timeout: in - how long to wait (NULL forever); out - how much time remaining
> @@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
> * Returns 0 if the seqno was found within the alloted time. Else returns the
> * errno with remaining time filled in timeout argument.
> */
> -static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> - unsigned reset_counter,
> - bool interruptible,
> - struct timespec *timeout,
> - struct drm_i915_file_private *file_priv)
> +static int __wait_request(struct i915_gem_request *rq,
> + unsigned reset_counter,
> + bool interruptible,
> + struct timespec *timeout,
> + struct drm_i915_file_private *file_priv)
> {
> + struct intel_engine_cs *ring = rq->ring;
> struct drm_device *dev = ring->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_private *dev_priv = to_i915(dev);
> const bool irq_test_in_progress =
> ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
> struct timespec before, now;
> DEFINE_WAIT(wait);
> unsigned long timeout_expire;
> + u32 seqno = rq->seqno;
> int ret;
>
> WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>
> - if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> + if (i915_request_complete(rq, true))
> return 0;
>
> timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
> @@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> break;
> }
>
> - if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> + if (i915_request_complete(rq, false)) {
> ret = 0;
> break;
> }
> @@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> * request and object lists appropriately for that event.
> */
> int
> -i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
> +i915_wait_request(struct i915_gem_request *rq)
> {
> - struct drm_device *dev = ring->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - bool interruptible = dev_priv->mm.interruptible;
> + struct drm_device *dev = rq->ring->dev;
> + struct drm_i915_private *dev_priv = to_i915(dev);
> int ret;
>
> - BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> - BUG_ON(seqno == 0);
> + if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
> + return -EINVAL;
> +
> + if (i915_request_complete(rq, true))
> + return 0;
>
> - ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> + ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> + dev_priv->mm.interruptible);
> if (ret)
> return ret;
>
> - ret = i915_gem_check_olr(ring, seqno);
> + ret = i915_gem_check_olr(rq);
> if (ret)
> return ret;
>
> - return __wait_seqno(ring, seqno,
> - atomic_read(&dev_priv->gpu_error.reset_counter),
> - interruptible, NULL, NULL);
> -}
> -
> -static int
> -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *ring)
> -{
> - if (!obj->active)
> - return 0;
> -
> - /* Manually manage the write flush as we may have not yet
> - * retired the buffer.
> - *
> - * Note that the last_write_seqno is always the earlier of
> - * the two (read/write) seqno, so if we haved successfully waited,
> - * we know we have passed the last write.
> - */
> - obj->last_write_seqno = 0;
> -
> - return 0;
> + return __wait_request(rq,
> + atomic_read(&dev_priv->gpu_error.reset_counter),
> + dev_priv->mm.interruptible, NULL, NULL);
> }
>
> /**
> @@ -1544,19 +1594,37 @@ static __must_check int
> i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> bool readonly)
> {
> - struct intel_engine_cs *ring = obj->ring;
> - u32 seqno;
> - int ret;
> + int i, ret;
>
> - seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> - if (seqno == 0)
> - return 0;
> + if (readonly) {
> + if (obj->last_write.request == NULL)
> + return 0;
>
> - ret = i915_wait_seqno(ring, seqno);
> - if (ret)
> - return ret;
> + ret = i915_wait_request(obj->last_write.request);
> + if (ret)
> + return ret;
> + } else {
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + if (obj->last_read[i].request == NULL)
> + continue;
> +
> + ret = i915_wait_request(obj->last_read[i].request);
> + if (ret)
> + return ret;
> + }
> + }
>
> - return i915_gem_object_wait_rendering__tail(obj, ring);
> + /* Manually manage the write flush as we may have not yet
> + * retired the buffer.
> + *
> + * Note that the last_write_seqno is always the earlier of
> + * the two (read/write) seqno, so if we haved successfully waited,
> + * we know we have passed the last write.
> + */
> + if (obj->last_write.request)
> + i915_gem_object_retire__write(obj);
> +
> + return 0;
> }
>
> /* A nonblocking variant of the above wait. This is a highly dangerous routine
> @@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> {
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> - struct intel_engine_cs *ring = obj->ring;
> + struct i915_gem_request *rq[I915_NUM_RINGS] = {};
> unsigned reset_counter;
> - u32 seqno;
> - int ret;
> + int i, n, ret;
>
> BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> BUG_ON(!dev_priv->mm.interruptible);
>
> - seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> - if (seqno == 0)
> + n = 0;
> + if (readonly) {
> + if (obj->last_write.request)
> + rq[n++] = i915_request_get(obj->last_write.request);
> + } else {
> + for (i = 0; i < I915_NUM_RINGS; i++)
> + if (obj->last_read[i].request)
> + rq[n++] = i915_request_get(obj->last_read[i].request);
> + }
> + if (n == 0)
> return 0;
>
> ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
> if (ret)
> - return ret;
> + goto out;
>
> - ret = i915_gem_check_olr(ring, seqno);
> - if (ret)
> - return ret;
> + for (i = 0; i < n; i++) {
> + ret = i915_gem_check_olr(rq[i]);
> + if (ret)
> + goto out;
> + }
>
> reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
> mutex_unlock(&dev->struct_mutex);
> - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
> +
> + for (i = 0; ret == 0 && i < n; i++)
> + ret = __wait_request(rq[i], reset_counter, true, NULL, file_priv);
> +
> mutex_lock(&dev->struct_mutex);
> - if (ret)
> - return ret;
>
> - return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> + for (i = 0; i < n; i++)
> + i915_request_put(rq[i]);
> +
> + return ret;
> }
>
> /**
> @@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> -static void
> -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *ring)
> -{
> - u32 seqno = intel_ring_get_seqno(ring);
> -
> - BUG_ON(ring == NULL);
> - if (obj->ring != ring && obj->last_write_seqno) {
> - /* Keep the seqno relative to the current ring */
> - obj->last_write_seqno = seqno;
> - }
> - obj->ring = ring;
> -
> - /* Add a reference if we're newly entering the active list. */
> - if (!obj->active) {
> - drm_gem_object_reference(&obj->base);
> - obj->active = 1;
> - }
> -
> - list_move_tail(&obj->ring_list, &ring->active_list);
> -
> - obj->last_read_seqno = seqno;
> -}
> -
> void i915_vma_move_to_active(struct i915_vma *vma,
> - struct intel_engine_cs *ring)
> + struct intel_engine_cs *ring,
> + unsigned fenced)
> {
> - list_move_tail(&vma->mm_list, &vma->vm->active_list);
> - return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> + struct drm_i915_gem_object *obj = vma->obj;
> + struct i915_gem_request *rq = intel_ring_get_request(ring);
> + u32 old_read = obj->base.read_domains;
> + u32 old_write = obj->base.write_domain;
>
> -static void
> -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> -{
> - struct i915_vma *vma;
> + BUG_ON(rq == NULL);
>
> - BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
> - BUG_ON(!obj->active);
> + obj->base.write_domain = obj->base.pending_write_domain;
> + if (obj->base.write_domain == 0)
> + obj->base.pending_read_domains |= obj->base.read_domains;
> + obj->base.read_domains = obj->base.pending_read_domains;
>
> - list_for_each_entry(vma, &obj->vma_list, vma_link) {
> - if (!list_empty(&vma->mm_list))
> - list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> - }
> -
> - intel_fb_obj_flush(obj, true);
> -
> - list_del_init(&obj->ring_list);
> - obj->ring = NULL;
> + obj->base.pending_read_domains = 0;
> + obj->base.pending_write_domain = 0;
>
> - obj->last_read_seqno = 0;
> - obj->last_write_seqno = 0;
> - obj->base.write_domain = 0;
> + trace_i915_gem_object_change_domain(obj, old_read, old_write);
> + if (obj->base.read_domains == 0)
> + return;
>
> - obj->last_fenced_seqno = 0;
> + /* Add a reference if we're newly entering the active list. */
> + if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
> + drm_gem_object_reference(&obj->base);
>
> - obj->active = 0;
> - drm_gem_object_unreference(&obj->base);
> + obj->last_read[ring->id].request = rq;
> + list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
>
> - WARN_ON(i915_verify_lists(dev));
> -}
> + if (obj->base.write_domain) {
> + obj->dirty = 1;
> + obj->last_write.request = rq;
> + list_move_tail(&obj->last_write.ring_list, &ring->write_list);
> + intel_fb_obj_invalidate(obj, ring);
>
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj)
> -{
> - struct intel_engine_cs *ring = obj->ring;
> + /* update for the implicit flush after a batch */
> + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> + }
>
> - if (ring == NULL)
> - return;
> + if (fenced) {
> + obj->last_fence.request = rq;
> + list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
> + if (fenced & 2) {
Please use the #define here ...
> + struct drm_i915_private *dev_priv = to_i915(ring->dev);
> + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> + &dev_priv->mm.fence_list);
> + }
> + }
>
> - if (i915_seqno_passed(ring->get_seqno(ring, true),
> - obj->last_read_seqno))
> - i915_gem_object_move_to_inactive(obj);
> + list_move_tail(&vma->mm_list, &vma->vm->active_list);
> }
>
> static int
> @@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>
> int __i915_add_request(struct intel_engine_cs *ring,
> struct drm_file *file,
> - struct drm_i915_gem_object *obj,
> - u32 *out_seqno)
> + struct drm_i915_gem_object *obj)
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
> u32 request_ring_position, request_start;
> int ret;
>
> @@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
> if (ret)
> return ret;
>
> - request = ring->preallocated_lazy_request;
> - if (WARN_ON(request == NULL))
> + rq = ring->preallocated_request;
> + if (WARN_ON(rq == NULL))
> return -ENOMEM;
>
> /* Record the position of the start of the request so that
> @@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
> if (ret)
> return ret;
>
> - request->seqno = intel_ring_get_seqno(ring);
> - request->ring = ring;
> - request->head = request_start;
> - request->tail = request_ring_position;
> + rq->head = request_start;
> + rq->tail = request_ring_position;
>
> /* Whilst this request exists, batch_obj will be on the
> * active_list, and so will hold the active reference. Only when this
> @@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
> * inactive_list and lose its active reference. Hence we do not need
> * to explicitly hold another reference here.
> */
> - request->batch_obj = obj;
> + rq->batch_obj = obj;
>
> /* Hold a reference to the current context so that we can inspect
> * it later in case a hangcheck error event fires.
> */
> - request->ctx = ring->last_context;
> - if (request->ctx)
> - i915_gem_context_reference(request->ctx);
> + rq->ctx = ring->last_context;
> + if (rq->ctx)
> + i915_gem_context_reference(rq->ctx);
>
> - request->emitted_jiffies = jiffies;
> - list_add_tail(&request->list, &ring->request_list);
> - request->file_priv = NULL;
> + rq->emitted_jiffies = jiffies;
> + list_add_tail(&rq->list, &ring->request_list);
> + rq->file_priv = NULL;
>
> if (file) {
> struct drm_i915_file_private *file_priv = file->driver_priv;
>
> spin_lock(&file_priv->mm.lock);
> - request->file_priv = file_priv;
> - list_add_tail(&request->client_list,
> + rq->file_priv = file_priv;
> + list_add_tail(&rq->client_list,
> &file_priv->mm.request_list);
> spin_unlock(&file_priv->mm.lock);
> }
>
> - trace_i915_gem_request_add(ring, request->seqno);
> - ring->outstanding_lazy_seqno = 0;
> - ring->preallocated_lazy_request = NULL;
> + trace_i915_gem_request_add(ring, rq->seqno);
> + ring->preallocated_request = NULL;
>
> if (!dev_priv->ums.mm_suspended) {
> i915_queue_hangcheck(ring->dev);
> @@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
> intel_mark_busy(dev_priv->dev);
> }
>
> - if (out_seqno)
> - *out_seqno = request->seqno;
> return 0;
> }
>
> static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +i915_gem_request_remove_from_client(struct i915_gem_request *rq)
> {
> - struct drm_i915_file_private *file_priv = request->file_priv;
> + struct drm_i915_file_private *file_priv = rq->file_priv;
>
> if (!file_priv)
> return;
>
> spin_lock(&file_priv->mm.lock);
> - list_del(&request->client_list);
> - request->file_priv = NULL;
> + list_del(&rq->client_list);
> + rq->file_priv = NULL;
> spin_unlock(&file_priv->mm.lock);
> }
>
> @@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
> }
> }
>
> -static void i915_gem_free_request(struct drm_i915_gem_request *request)
> +void __i915_request_free(struct kref *kref)
> +{
> + struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
> + kfree(rq);
> +}
> +
> +static void i915_request_retire(struct i915_gem_request *rq)
> {
> - list_del(&request->list);
> - i915_gem_request_remove_from_client(request);
> + rq->completed = true;
> +
> + list_del(&rq->list);
> + i915_gem_request_remove_from_client(rq);
>
> - if (request->ctx)
> - i915_gem_context_unreference(request->ctx);
> + if (rq->ctx) {
> + i915_gem_context_unreference(rq->ctx);
> + rq->ctx = NULL;
> + }
>
> - kfree(request);
> + i915_request_put(rq);
> }
>
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
> i915_gem_find_active_request(struct intel_engine_cs *ring)
> {
> - struct drm_i915_gem_request *request;
> - u32 completed_seqno;
> + struct i915_gem_request *rq;
>
> - completed_seqno = ring->get_seqno(ring, false);
> -
> - list_for_each_entry(request, &ring->request_list, list) {
> - if (i915_seqno_passed(completed_seqno, request->seqno))
> + list_for_each_entry(rq, &ring->request_list, list) {
> + if (i915_request_complete(rq, false))
> continue;
>
> - return request;
> + return rq;
> }
>
> return NULL;
> @@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
> static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
> struct intel_engine_cs *ring)
> {
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
> bool ring_hung;
>
> - request = i915_gem_find_active_request(ring);
> + rq = i915_gem_find_active_request(ring);
>
> - if (request == NULL)
> + if (rq == NULL)
> return;
>
> ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>
> - i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> + i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
>
> - list_for_each_entry_continue(request, &ring->request_list, list)
> - i915_set_reset_status(dev_priv, request->ctx, false);
> + list_for_each_entry_continue(rq, &ring->request_list, list)
> + i915_set_reset_status(dev_priv, rq->ctx, false);
> }
>
> static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
> struct intel_engine_cs *ring)
> {
> - while (!list_empty(&ring->active_list)) {
> + while (!list_empty(&ring->write_list)) {
> struct drm_i915_gem_object *obj;
>
> - obj = list_first_entry(&ring->active_list,
> + obj = list_first_entry(&ring->write_list,
> struct drm_i915_gem_object,
> - ring_list);
> + last_write.ring_list);
>
> - i915_gem_object_move_to_inactive(obj);
> + i915_gem_object_retire__write(obj);
> + }
> +
> + while (!list_empty(&ring->fence_list)) {
> + struct drm_i915_gem_object *obj;
> +
> + obj = list_first_entry(&ring->fence_list,
> + struct drm_i915_gem_object,
> + last_fence.ring_list);
> +
> + i915_gem_object_retire__fence(obj);
> + }
> +
> + while (!list_empty(&ring->read_list)) {
> + struct drm_i915_gem_object *obj;
> +
> + obj = list_first_entry(&ring->read_list,
> + struct drm_i915_gem_object,
> + last_read[ring->id].ring_list);
> +
> + i915_gem_object_retire__read(obj, ring);
> }
>
> /*
> @@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
> * the request.
> */
> while (!list_empty(&ring->request_list)) {
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
>
> - request = list_first_entry(&ring->request_list,
> - struct drm_i915_gem_request,
> - list);
> + rq = list_first_entry(&ring->request_list,
> + struct i915_gem_request,
> + list);
>
> - i915_gem_free_request(request);
> + i915_request_retire(rq);
> }
>
> /* These may not have been flush before the reset, do so now */
> - kfree(ring->preallocated_lazy_request);
> - ring->preallocated_lazy_request = NULL;
> - ring->outstanding_lazy_seqno = 0;
> + kfree(ring->preallocated_request);
> + ring->preallocated_request = NULL;
> }
>
> void i915_gem_restore_fences(struct drm_device *dev)
> @@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
> * by the ringbuffer to the flushing/inactive lists as appropriate,
> * before we free the context associated with the requests.
> */
> - while (!list_empty(&ring->active_list)) {
> + while (!list_empty(&ring->write_list)) {
> + struct drm_i915_gem_object *obj;
> +
> + obj = list_first_entry(&ring->write_list,
> + struct drm_i915_gem_object,
> + last_write.ring_list);
> +
> + if (!__i915_seqno_passed(seqno,
> + obj->last_write.request->seqno))
> + break;
> +
> + i915_gem_object_retire__write(obj);
> + }
> +
> + while (!list_empty(&ring->fence_list)) {
> struct drm_i915_gem_object *obj;
>
> - obj = list_first_entry(&ring->active_list,
> - struct drm_i915_gem_object,
> - ring_list);
> + obj = list_first_entry(&ring->fence_list,
> + struct drm_i915_gem_object,
> + last_fence.ring_list);
>
> - if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> + if (!__i915_seqno_passed(seqno,
> + obj->last_fence.request->seqno))
> break;
>
> - i915_gem_object_move_to_inactive(obj);
> + i915_gem_object_retire__fence(obj);
> }
>
> + while (!list_empty(&ring->read_list)) {
> + struct drm_i915_gem_object *obj;
> +
> + obj = list_first_entry(&ring->read_list,
> + struct drm_i915_gem_object,
> + last_read[ring->id].ring_list);
> +
> + if (!__i915_seqno_passed(seqno,
> + obj->last_read[ring->id].request->seqno))
> + break;
> +
> + i915_gem_object_retire__read(obj, ring);
> + }
>
> while (!list_empty(&ring->request_list)) {
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
>
> - request = list_first_entry(&ring->request_list,
> - struct drm_i915_gem_request,
> - list);
> + rq = list_first_entry(&ring->request_list,
> + struct i915_gem_request,
> + list);
>
> - if (!i915_seqno_passed(seqno, request->seqno))
> + if (!__i915_seqno_passed(seqno, rq->seqno))
> break;
>
> - trace_i915_gem_request_retire(ring, request->seqno);
> + trace_i915_gem_request_retire(ring, rq->seqno);
> /* We know the GPU must have read the request to have
> * sent us the seqno + interrupt, so use the position
> * of tail of the request to update the last known position
> * of the GPU head.
> */
> - ring->buffer->last_retired_head = request->tail;
> + ring->buffer->last_retired_head = rq->tail;
>
> - i915_gem_free_request(request);
> + i915_request_retire(rq);
> }
>
> if (unlikely(ring->trace_irq_seqno &&
> - i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> + __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> ring->irq_put(ring);
> ring->trace_irq_seqno = 0;
> }
> @@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
> static int
> i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
> {
> - int ret;
> + int i;
>
> - if (obj->active) {
> - ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
> + if (!obj->active)
> + return 0;
> +
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + struct i915_gem_request *rq = obj->last_read[i].request;
> + int ret;
> +
> + if (rq == NULL)
> + continue;
> +
> + ret = i915_gem_check_olr(rq);
> if (ret)
> return ret;
>
> - i915_gem_retire_requests_ring(obj->ring);
> + i915_gem_retire_requests_ring(rq->ring);
> }
>
> return 0;
> @@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_gem_wait *args = data;
> struct drm_i915_gem_object *obj;
> - struct intel_engine_cs *ring = NULL;
> struct timespec timeout_stack, *timeout = NULL;
> + struct i915_gem_request *rq[I915_NUM_RINGS] = {};
> unsigned reset_counter;
> - u32 seqno = 0;
> - int ret = 0;
> + int i, n, ret = 0;
>
> if (args->timeout_ns >= 0) {
> timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> if (ret)
> goto out;
>
> - if (obj->active) {
> - seqno = obj->last_read_seqno;
> - ring = obj->ring;
> - }
> -
> - if (seqno == 0)
> - goto out;
> + if (!obj->active)
> + goto out;
>
> /* Do this after OLR check to make sure we make forward progress polling
> * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> goto out;
> }
>
> + for (i = n = 0; i < I915_NUM_RINGS; i++) {
> + if (obj->last_read[i].request == NULL)
> + continue;
> +
> + rq[n++] = i915_request_get(obj->last_read[i].request);
> + }
> +
> drm_gem_object_unreference(&obj->base);
> +
> reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
> mutex_unlock(&dev->struct_mutex);
>
> - ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
> + for (i = 0; i < n; i++) {
> + if (ret == 0)
> + ret = __wait_request(rq[i], reset_counter, true, timeout, file->driver_priv);
> +
> + i915_request_put(rq[i]);
> + }
> +
> if (timeout)
> args->timeout_ns = timespec_to_ns(timeout);
> return ret;
> @@ -3024,6 +3150,45 @@ out:
> return ret;
> }
>
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> + struct intel_engine_cs *to,
> + struct drm_i915_gem_object *obj)
> +{
> + int ret, idx;
> +
> + if (to == NULL)
> + return i915_wait_request(rq);
> +
> + /* XXX this is broken by VEBOX+ */
> + idx = intel_ring_sync_index(rq->ring, to);
> +
> + /* Optimization: Avoid semaphore sync when we are sure we already
> + * waited for an object with higher seqno */
> + if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
> + return 0;
> +
> + ret = i915_gem_check_olr(rq);
> + if (ret)
> + return ret;
> +
> + if (!i915_request_complete(rq, true)) {
> + trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
> + ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
> + if (ret)
> + return ret;
> + }
> +
> + /* We must recheck last_reqad_request because sync_to()
> + * might have just caused seqno wrap under
> + * the radar.
> + */
> + if (obj->last_read[rq->ring->id].request == rq)
> + rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
> +
> + return 0;
> +}
> +
> /**
> * i915_gem_object_sync - sync an object to a ring.
> *
> @@ -3038,44 +3203,35 @@ out:
> */
> int
> i915_gem_object_sync(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *to)
> + struct intel_engine_cs *to,
> + bool readonly)
> {
> - struct intel_engine_cs *from = obj->ring;
> - u32 seqno;
> - int ret, idx;
> + struct i915_gem_request *rq;
> + struct intel_engine_cs *semaphore;
> + int ret = 0, i;
>
> - if (from == NULL || to == from)
> - return 0;
> + semaphore = NULL;
> + if (i915_semaphore_is_enabled(obj->base.dev))
> + semaphore = to;
>
> - if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> - return i915_gem_object_wait_rendering(obj, false);
> -
> - /* XXX this is broken by VEBOX+ */
> - idx = intel_ring_sync_index(from, to);
> -
> - seqno = obj->last_read_seqno;
> - /* Optimization: Avoid semaphore sync when we are sure we already
> - * waited for an object with higher seqno */
> - if (seqno <= from->semaphore.sync_seqno[idx])
> - return 0;
> -
> - ret = 0;
> - if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
> - ret = i915_gem_check_olr(from, seqno);
> - if (ret)
> - return ret;
> + if (readonly) {
> + rq = obj->last_write.request;
> + if (rq != NULL && to != rq->ring)
> + ret = i915_request_sync(rq, semaphore, obj);
> + } else {
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + rq = obj->last_read[i].request;
> + if (rq == NULL || to == rq->ring)
> + continue;
>
> - trace_i915_gem_ring_sync_to(from, to, seqno);
> - ret = to->semaphore.sync_to(to, from, seqno);
> + ret = i915_request_sync(rq, semaphore, obj);
> + if (ret)
> + break;
> + }
> }
> - if (!ret)
> - /* We use last_read_seqno because sync_to()
> - * might have just caused seqno wrap under
> - * the radar.
> - */
> - from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
>
> return ret;
> +
> }
>
> static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> @@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
> static int
> i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
> {
> - if (obj->last_fenced_seqno) {
> - int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
> - if (ret)
> - return ret;
> + int ret;
>
> - obj->last_fenced_seqno = 0;
> - }
> + if (obj->last_fence.request == NULL)
> + return 0;
>
> + ret = i915_wait_request(obj->last_fence.request);
> + if (ret)
> + return ret;
> +
> + i915_gem_object_retire__fence(obj);
> return 0;
> }
>
> @@ -3836,11 +3994,12 @@ int
> i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> uint32_t old_write_domain, old_read_domains;
> int ret;
>
> /* Not valid to be called on unbound objects. */
> - if (!i915_gem_obj_bound_any(obj))
> + if (vma == NULL)
> return -EINVAL;
>
> if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> old_write_domain);
>
> /* And bump the LRU for this access */
> - if (!obj->active) {
> - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> - if (vma)
> - list_move_tail(&vma->mm_list,
> - &dev_priv->gtt.base.inactive_list);
> -
> - }
> -
> + list_move_tail(&vma->mm_list,
> + &dev_priv->gtt.base.inactive_list);
We've lost the obj->active check here and I didn't spot anything that
would justify that.
> return 0;
> }
>
> @@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> bool was_pin_display;
> int ret;
>
> - if (pipelined != obj->ring) {
> - ret = i915_gem_object_sync(obj, pipelined);
> - if (ret)
> - return ret;
> - }
> + ret = i915_gem_object_sync(obj, pipelined, true);
> + if (ret)
> + return ret;
>
> /* Mark the pin_display early so that we account for the
> * display coherency whilst setting up the cache domains.
> @@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_file_private *file_priv = file->driver_priv;
> unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
> - struct drm_i915_gem_request *request;
> - struct intel_engine_cs *ring = NULL;
> + struct i915_gem_request *rq;
> unsigned reset_counter;
> - u32 seqno = 0;
> int ret;
>
> ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> @@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> return ret;
>
> spin_lock(&file_priv->mm.lock);
> - list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> - if (time_after_eq(request->emitted_jiffies, recent_enough))
> + list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
> + if (time_after_eq(rq->emitted_jiffies, recent_enough))
> break;
> -
> - ring = request->ring;
> - seqno = request->seqno;
> }
> + rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? NULL : rq);
> reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
> spin_unlock(&file_priv->mm.lock);
>
> - if (seqno == 0)
> + if (rq == NULL)
> return 0;
>
> - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
> + ret = __wait_request(rq, reset_counter, true, NULL, NULL);
> if (ret == 0)
> queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>
> + i915_request_put(rq);
> return ret;
> }
>
> @@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
> {
> struct drm_i915_gem_busy *args = data;
> struct drm_i915_gem_object *obj;
> - int ret;
> + int ret, i;
>
> ret = i915_mutex_lock_interruptible(dev);
> if (ret)
> @@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
> */
> ret = i915_gem_object_flush_active(obj);
>
> - args->busy = obj->active;
> - if (obj->ring) {
> + args->busy = 0;
> + if (obj->active) {
> BUILD_BUG_ON(I915_NUM_RINGS > 16);
Hm, this suggests we should size active to be 4 bits. Just to stay
consistent.
> - args->busy |= intel_ring_flag(obj->ring) << 16;
> + args->busy |= 1;
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + if (obj->last_read[i].request == NULL)
> + continue;
> +
> + args->busy |= 1 << (16 + i);
> + }
> }
>
> drm_gem_object_unreference(&obj->base);
> @@ -4584,8 +4738,13 @@ unlock:
> void i915_gem_object_init(struct drm_i915_gem_object *obj,
> const struct drm_i915_gem_object_ops *ops)
> {
> + int i;
> +
> INIT_LIST_HEAD(&obj->global_list);
> - INIT_LIST_HEAD(&obj->ring_list);
> + INIT_LIST_HEAD(&obj->last_fence.ring_list);
> + INIT_LIST_HEAD(&obj->last_write.ring_list);
> + for (i = 0; i < I915_NUM_RINGS; i++)
> + INIT_LIST_HEAD(&obj->last_read[i].ring_list);
> INIT_LIST_HEAD(&obj->obj_exec_link);
> INIT_LIST_HEAD(&obj->vma_list);
>
> @@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
> static void
> init_ring_lists(struct intel_engine_cs *ring)
> {
> - INIT_LIST_HEAD(&ring->active_list);
> + INIT_LIST_HEAD(&ring->read_list);
> + INIT_LIST_HEAD(&ring->write_list);
> + INIT_LIST_HEAD(&ring->fence_list);
> INIT_LIST_HEAD(&ring->request_list);
> }
>
> @@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
> */
> spin_lock(&file_priv->mm.lock);
> while (!list_empty(&file_priv->mm.request_list)) {
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
>
> - request = list_first_entry(&file_priv->mm.request_list,
> - struct drm_i915_gem_request,
> - client_list);
> - list_del(&request->client_list);
> - request->file_priv = NULL;
> + rq = list_first_entry(&file_priv->mm.request_list,
> + struct i915_gem_request,
> + client_list);
> + list_del(&rq->client_list);
> + rq->file_priv = NULL;
> }
> spin_unlock(&file_priv->mm.lock);
> }
> @@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
> {
> struct i915_vma *vma;
>
> - /* This WARN has probably outlived its usefulness (callers already
> - * WARN if they don't find the GGTT vma they expect). When removing,
> - * remember to remove the pre-check in is_pin_display() as well */
> - if (WARN_ON(list_empty(&obj->vma_list)))
> - return NULL;
> -
Smells like a separate patch. Maybe do it up-front if taking it out is too
invasive.
> vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
> if (vma->vm != obj_to_ggtt(obj))
> return NULL;
>
> return vma;
> }
> +
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
This one needs a big warning that it's only suitable as a hint for error
state and debugfs. If execbuf gets stuck in the slowpath we might end up
with slightly out-of-order reads (since now they don't sync cross-engine
any more).
> +{
> + u32 seqno = 0;
> + struct i915_gem_request *rq = NULL;
> + int i;
> +
> + /* This is approximate as seqno cannot be used across rings */
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + if (obj->last_read[i].request == NULL)
> + continue;
> +
> + if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
> + rq = obj->last_read[i].request, seqno = rq->seqno;
> + }
> +
> + return rq;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 79dc77b..690e2dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
> if (!lctx)
> continue;
>
> - if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
> + if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
> WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
> get_context_alignment(dev), 0));
> - /* Fake a finish/inactive */
> - dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
> - dctx->legacy_hw_ctx.rcs_state->active = 0;
> - }
Again taste like a separate patch for up-front merging.
>
> if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
> i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
> @@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
> WARN_ON(!dev_priv->ring[RCS].last_context);
> if (dev_priv->ring[RCS].last_context == dctx) {
> /* Fake switch to NULL context */
> - WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
> i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
> i915_gem_context_unreference(dctx);
> dev_priv->ring[RCS].last_context = NULL;
> @@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
> * MI_SET_CONTEXT instead of when the next seqno has completed.
> */
> if (from != NULL) {
> - from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> + struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
> +
> + from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> + i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
> +
> /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
> * whole damn pipeline, we don't need to explicitly mark the
> * object dirty. The only exception is that the context must be
> @@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
> * able to defer doing this until we know the object would be
> * swapped, but there is no way to do that yet.
> */
> - from->legacy_hw_ctx.rcs_state->dirty = 1;
> - BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
> + from_obj->dirty = 1;
>
> /* obj is kept alive until the next request by its active ref */
> - i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> + i915_gem_object_ggtt_unpin(from_obj);
> i915_gem_context_unreference(from);
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> index 57d4dde..787ea6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_exec.c
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
> {
> int ret;
>
> - ret = i915_gem_object_sync(obj, ring);
> + ret = i915_gem_object_sync(obj, ring, false);
> if (ret)
> return ret;
>
> @@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
> static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
> struct intel_engine_cs *ring)
> {
> - obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> - obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> - i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
> - obj->last_write_seqno = intel_ring_get_seqno(ring);
> - obj->dirty = 1;
Would be nice to split out the semantic change of moving dirty = 1 into
move_to_active.
> + obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
> + obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
> + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
>
> ring->gpu_caches_dirty = true;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0faab01..8f1c2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>
> list_for_each_entry(vma, vmas, exec_list) {
> struct drm_i915_gem_object *obj = vma->obj;
> - ret = i915_gem_object_sync(obj, ring);
> +
> + ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
> if (ret)
> return ret;
>
> @@ -956,40 +957,20 @@ static void
> i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> struct intel_engine_cs *ring)
> {
> - u32 seqno = intel_ring_get_seqno(ring);
> struct i915_vma *vma;
>
> list_for_each_entry(vma, vmas, exec_list) {
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - struct drm_i915_gem_object *obj = vma->obj;
> - u32 old_read = obj->base.read_domains;
> - u32 old_write = obj->base.write_domain;
> -
> - obj->base.write_domain = obj->base.pending_write_domain;
> - if (obj->base.write_domain == 0)
> - obj->base.pending_read_domains |= obj->base.read_domains;
> - obj->base.read_domains = obj->base.pending_read_domains;
> -
> - i915_vma_move_to_active(vma, ring);
> - if (obj->base.write_domain) {
> - obj->dirty = 1;
> - obj->last_write_seqno = seqno;
> + unsigned fenced;
>
> - intel_fb_obj_invalidate(obj, ring);
> -
> - /* update for the implicit flush after a batch */
> - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> - }
> + fenced = 0;
> if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> - obj->last_fenced_seqno = seqno;
> - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> - struct drm_i915_private *dev_priv = to_i915(ring->dev);
> - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> - &dev_priv->mm.fence_list);
> - }
> + fenced |= VMA_IS_FENCED;
> + if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> + fenced |= VMA_HAS_FENCE;
> }
>
> - trace_i915_gem_object_change_domain(obj, old_read, old_write);
> + i915_vma_move_to_active(vma, ring, fenced);
> }
> }
>
> @@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
> ring->gpu_caches_dirty = true;
>
> /* Add a breadcrumb for the completion of the batch buffer */
> - (void)__i915_add_request(ring, file, obj, NULL);
> + (void)__i915_add_request(ring, file, obj);
> }
>
> static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index e60be3f..fc1223c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
> if (ret)
> goto out;
>
> - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> + so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
>
> - ret = __i915_add_request(ring, NULL, so.obj, NULL);
> + ret = __i915_add_request(ring, NULL, so.obj);
> /* __i915_add_request moves object to inactive if it fails */
> out:
> render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index af5d31a..e46fb34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>
> if (ret == 0) {
> obj->fence_dirty =
> - obj->last_fenced_seqno ||
> + obj->last_fence.request ||
> obj->fence_reg != I915_FENCE_REG_NONE;
> obj->tiling_mode = tiling_mode;
> obj->stride = stride;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ebc8529..584b863 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
> if (i915_gem_obj_bound(src, vm))
> dst->gtt_offset = i915_gem_obj_offset(src, vm);
> else
> - dst->gtt_offset = -1UL;
> + dst->gtt_offset = -1;
Spurious change?
>
> reloc_offset = dst->gtt_offset;
> use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> @@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> struct i915_vma *vma)
> {
> struct drm_i915_gem_object *obj = vma->obj;
> + struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>
> err->size = obj->base.size;
> err->name = obj->base.name;
> - err->rseqno = obj->last_read_seqno;
> - err->wseqno = obj->last_write_seqno;
> + err->rseqno = i915_request_seqno(rq);
> + err->wseqno = i915_request_seqno(obj->last_write.request);
> err->gtt_offset = vma->node.start;
> err->read_domains = obj->base.read_domains;
> err->write_domain = obj->base.write_domain;
> @@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> err->dirty = obj->dirty;
> err->purgeable = obj->madv != I915_MADV_WILLNEED;
> err->userptr = obj->userptr.mm != NULL;
> - err->ring = obj->ring ? obj->ring->id : -1;
> + err->ring = i915_request_ring_id(rq);
> err->cache_level = obj->cache_level;
> }
>
> @@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
> struct drm_i915_error_state *error)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> - struct drm_i915_gem_request *request;
> + struct i915_gem_request *rq;
> int i, count;
>
> for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
>
> i915_record_ring_state(dev, error, ring, &error->ring[i]);
>
> - request = i915_gem_find_active_request(ring);
> - if (request) {
> + rq = i915_gem_find_active_request(ring);
This reminds me that our locking for the error state capture and also the
guilty batch determination is fairly ... nonexistent. This will be a fun
problem to fix once we make reset more common with per-engine resets and
short-lived timers for media workloads. Anyway, unrelated comment.
> + if (rq) {
> /* We need to copy these to an anonymous buffer
> * as the simplest method to avoid being overwritten
> * by userspace.
> */
> error->ring[i].batchbuffer =
> i915_error_object_create(dev_priv,
> - request->batch_obj,
> - request->ctx ?
> - request->ctx->vm :
> + rq->batch_obj,
> + rq->ctx ?
> + rq->ctx->vm :
> &dev_priv->gtt.base);
>
> if (HAS_BROKEN_CS_TLB(dev_priv))
> @@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
> i915_error_ggtt_object_create(dev_priv,
> ring->scratch.obj);
>
> - if (request->file_priv) {
> + if (rq->file_priv) {
> struct task_struct *task;
>
> rcu_read_lock();
> - task = pid_task(request->file_priv->file->pid,
> + task = pid_task(rq->file_priv->file->pid,
> PIDTYPE_PID);
> if (task) {
> strcpy(error->ring[i].comm, task->comm);
> @@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
> i915_gem_record_active_context(ring, error, &error->ring[i]);
>
> count = 0;
> - list_for_each_entry(request, &ring->request_list, list)
> + list_for_each_entry(rq, &ring->request_list, list)
> count++;
>
> error->ring[i].num_requests = count;
> @@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
> }
>
> count = 0;
> - list_for_each_entry(request, &ring->request_list, list) {
> + list_for_each_entry(rq, &ring->request_list, list) {
> struct drm_i915_error_request *erq;
>
> erq = &error->ring[i].requests[count++];
> - erq->seqno = request->seqno;
> - erq->jiffies = request->emitted_jiffies;
> - erq->tail = request->tail;
> + erq->seqno = rq->seqno;
> + erq->jiffies = rq->emitted_jiffies;
> + erq->tail = rq->tail;
> }
> }
> }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 717c111..6d4f5a7 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2935,14 +2935,14 @@ static u32
> ring_last_seqno(struct intel_engine_cs *ring)
> {
> return list_entry(ring->request_list.prev,
> - struct drm_i915_gem_request, list)->seqno;
> + struct i915_gem_request, list)->seqno;
> }
>
> static bool
> ring_idle(struct intel_engine_cs *ring, u32 seqno)
> {
> return (list_empty(&ring->request_list) ||
> - i915_seqno_passed(seqno, ring_last_seqno(ring)));
> + __i915_seqno_passed(seqno, ring_last_seqno(ring)));
> }
>
> static bool
> @@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
> if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
> return -1;
>
> - if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> + if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> return 1;
>
> /* cursory check for an unkickable deadlock */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 75f423d..f1c2a28 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
> int i;
>
> for_each_ring(ring, i915, i) {
> - struct drm_i915_gem_request *rq;
> + struct i915_gem_request *rq;
>
> if (list_empty(&ring->request_list))
> continue;
>
> rq = list_last_entry(&ring->request_list,
> - struct drm_i915_gem_request,
> + struct i915_gem_request,
> list);
>
> - if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
> + if (i915_request_complete(rq, true))
> continue;
>
> return true;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 63f6875..0ebd85d 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
> TP_fast_assign(
> __entry->dev = ring->dev->primary->index;
> __entry->ring = ring->id;
> - __entry->seqno = intel_ring_get_seqno(ring),
> + __entry->seqno = intel_ring_get_request(ring)->seqno,
> __entry->flags = flags;
> i915_trace_irq_get(ring, __entry->seqno);
> ),
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d828f47..9b7931c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>
> + i915_request_put(work->flip_queued_request);
> kfree(work);
> }
>
> @@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
> else if (i915.use_mmio_flip > 0)
> return true;
> else
> - return ring != obj->ring;
> + return ring != i915_request_ring(obj->last_write.request);
> }
>
> static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> @@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
>
> static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> {
> - struct intel_engine_cs *ring;
> + struct i915_gem_request *rq = obj->last_write.request;
> int ret;
>
> lockdep_assert_held(&obj->base.dev->struct_mutex);
>
> - if (!obj->last_write_seqno)
> - return 0;
> -
> - ring = obj->ring;
> -
> - if (i915_seqno_passed(ring->get_seqno(ring, true),
> - obj->last_write_seqno))
> + if (rq == NULL)
> return 0;
>
> - ret = i915_gem_check_olr(ring, obj->last_write_seqno);
> + ret = i915_gem_check_olr(rq);
> if (ret)
> return ret;
>
> - if (WARN_ON(!ring->irq_get(ring)))
> + if (i915_request_complete(rq, true))
> + return 0;
> +
> + if (WARN_ON(!rq->ring->irq_get(rq->ring)))
> return 0;
>
> return 1;
> @@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs *ring)
> if (ring->id != mmio_flip->ring_id)
> continue;
>
> - if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> + if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
> intel_do_mmio_flip(intel_crtc);
> mmio_flip->seqno = 0;
> ring->irq_put(ring);
> @@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + struct i915_gem_request *rq;
> unsigned long irq_flags;
> int ret;
>
> @@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
> return 0;
> }
>
> + rq = obj->last_write.request;
> + if (WARN_ON(rq == NULL))
> + return 0;
> +
> spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> - intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> - intel_crtc->mmio_flip.ring_id = obj->ring->id;
> + intel_crtc->mmio_flip.seqno = rq->seqno;
> + intel_crtc->mmio_flip.ring_id = rq->ring->id;
> spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
>
> /*
> * Double check to catch cases where irq fired before
> * mmio flip data was ready
> */
> - intel_notify_mmio_flip(obj->ring);
> + intel_notify_mmio_flip(rq->ring);
> return 0;
> }
>
> @@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
> return false;
>
> if (work->flip_ready_vblank == 0) {
> - if (work->ring &&
> - !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
> - work->flip_queued_seqno))
> + struct i915_gem_request *rq = work->flip_queued_request;
> + if (rq && !i915_request_complete(rq, true))
> return false;
>
> work->flip_ready_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> @@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> enum pipe pipe = intel_crtc->pipe;
> struct intel_unpin_work *work;
> struct intel_engine_cs *ring;
> + struct i915_gem_request *rq;
> unsigned long flags;
> int ret;
>
> @@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> } else if (IS_IVYBRIDGE(dev)) {
> ring = &dev_priv->ring[BCS];
> } else if (INTEL_INFO(dev)->gen >= 7) {
> - ring = obj->ring;
> + ring = i915_request_ring(obj->last_write.request);
> if (ring == NULL || ring->id != RCS)
> ring = &dev_priv->ring[BCS];
> } else {
> @@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> }
>
> if (use_mmio_flip(ring, obj, page_flip_flags)) {
> - ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> + ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(obj->last_write.request));
> if (ret)
> goto cleanup_pending;
>
> @@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> if (ret)
> goto cleanup_unpin;
>
> - work->flip_queued_seqno = obj->last_write_seqno;
> - work->ring = obj->ring;
> + rq = obj->last_write.request;
> } else {
> ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> if (ret)
> @@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> if (ret)
> goto cleanup_unpin;
>
> - work->flip_queued_seqno = intel_ring_get_seqno(ring);
> - work->ring = ring;
> + rq = intel_ring_get_request(ring);
> }
>
> + work->flip_queued_request = i915_request_get(rq);
> work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> work->enable_stall_check = true;
>
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 274f77c..5f336a3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -657,14 +657,13 @@ struct intel_unpin_work {
> struct drm_i915_gem_object *old_fb_obj;
> struct drm_i915_gem_object *pending_flip_obj;
> struct drm_pending_vblank_event *event;
> - struct intel_engine_cs *ring;
> atomic_t pending;
> #define INTEL_FLIP_INACTIVE 0
> #define INTEL_FLIP_PENDING 1
> #define INTEL_FLIP_COMPLETE 2
> u32 flip_count;
> u32 gtt_offset;
> - u32 flip_queued_seqno;
> + struct i915_gem_request *flip_queued_request;
> int flip_queued_vblank;
> int flip_ready_vblank;
> bool enable_stall_check;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index d94af27..c709ca5 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -183,7 +183,7 @@ struct intel_overlay {
> u32 flip_addr;
> struct drm_i915_gem_object *reg_bo;
> /* flip handling */
> - uint32_t last_flip_req;
> + struct i915_gem_request *flip_request;
> void (*flip_tail)(struct intel_overlay *);
> };
>
> @@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
> io_mapping_unmap(regs);
> }
>
> -static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> - void (*tail)(struct intel_overlay *))
> +/* recover from an interruption due to a signal
> + * We have to be careful not to repeat work forever an make forward progess. */
> +static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> {
> - struct drm_device *dev = overlay->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> int ret;
>
> - BUG_ON(overlay->last_flip_req);
> - ret = i915_add_request(ring, &overlay->last_flip_req);
> - if (ret)
> - return ret;
> + if (overlay->flip_request == NULL)
> + return 0;
>
> - overlay->flip_tail = tail;
> - ret = i915_wait_seqno(ring, overlay->last_flip_req);
> + ret = i915_wait_request(overlay->flip_request);
> if (ret)
> return ret;
> - i915_gem_retire_requests(dev);
>
> - overlay->last_flip_req = 0;
> + i915_request_put(overlay->flip_request);
> + overlay->flip_request = NULL;
> +
> + i915_gem_retire_requests(overlay->dev);
> +
> + if (overlay->flip_tail)
> + overlay->flip_tail(overlay);
> +
> return 0;
> }
>
> +static int intel_overlay_add_request(struct intel_overlay *overlay,
> + struct intel_engine_cs *ring,
> + void (*tail)(struct intel_overlay *))
> +{
> + BUG_ON(overlay->flip_request);
> + overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
> + overlay->flip_tail = tail;
> +
> + return i915_add_request(ring);
> +}
> +
> +static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> + struct intel_engine_cs *ring,
> + void (*tail)(struct intel_overlay *))
> +{
> + intel_overlay_add_request(overlay, ring, tail);
> + return intel_overlay_recover_from_interrupt(overlay);
> +}
> +
> /* overlay needs to be disable in OCMD reg */
> static int intel_overlay_on(struct intel_overlay *overlay)
> {
> @@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
> intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
> intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> intel_ring_emit(ring, MI_NOOP);
> - intel_ring_advance(ring);
> + __intel_ring_advance(ring);
>
> - return intel_overlay_do_wait_request(overlay, NULL);
> + return intel_overlay_do_wait_request(overlay, ring, NULL);
> }
>
> /* overlay needs to be enabled in OCMD reg */
> @@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>
> intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
> intel_ring_emit(ring, flip_addr);
> - intel_ring_advance(ring);
> + __intel_ring_advance(ring);
>
> - return i915_add_request(ring, &overlay->last_flip_req);
> + return intel_overlay_add_request(overlay, ring, NULL);
> }
>
> static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
> {
> struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>
> + i915_gem_track_fb(obj, NULL,
> + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> +
> i915_gem_object_ggtt_unpin(obj);
> drm_gem_object_unreference(&obj->base);
>
> @@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
> intel_ring_emit(ring, flip_addr);
> intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> }
> - intel_ring_advance(ring);
> -
> - return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> -}
> -
> -/* recover from an interruption due to a signal
> - * We have to be careful not to repeat work forever an make forward progess. */
> -static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> -{
> - struct drm_device *dev = overlay->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> - int ret;
> -
> - if (overlay->last_flip_req == 0)
> - return 0;
> + __intel_ring_advance(ring);
>
> - ret = i915_wait_seqno(ring, overlay->last_flip_req);
> - if (ret)
> - return ret;
> - i915_gem_retire_requests(dev);
> -
> - if (overlay->flip_tail)
> - overlay->flip_tail(overlay);
> -
> - overlay->last_flip_req = 0;
> - return 0;
> + return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
> }
>
> /* Wait for pending overlay flip and release old frame.
> @@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> */
> static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
> {
> - struct drm_device *dev = overlay->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> - int ret;
> + struct drm_i915_private *dev_priv = to_i915(overlay->dev);
> + int ret = 0;
>
> /* Only wait if there is actually an old frame to release to
> * guarantee forward progress.
> @@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
> return 0;
>
> if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
> + struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +
> /* synchronous slowpath */
> ret = intel_ring_begin(ring, 2);
> if (ret)
> @@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>
> intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> intel_ring_emit(ring, MI_NOOP);
> - intel_ring_advance(ring);
> + __intel_ring_advance(ring);
>
> - ret = intel_overlay_do_wait_request(overlay,
> + ret = intel_overlay_do_wait_request(overlay, ring,
> intel_overlay_release_old_vid_tail);
> - if (ret)
> - return ret;
> - }
> -
> - intel_overlay_release_old_vid_tail(overlay);
> + } else
> + intel_overlay_release_old_vid_tail(overlay);
>
> -
> - i915_gem_track_fb(overlay->old_vid_bo, NULL,
> - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> - return 0;
> + return ret;
> }
>
> struct put_image_params {
> @@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
> iowrite32(0, ®s->OCMD);
> intel_overlay_unmap_regs(overlay, regs);
>
> - ret = intel_overlay_off(overlay);
> - if (ret != 0)
> - return ret;
> -
> - intel_overlay_off_tail(overlay);
> - return 0;
> + return intel_overlay_off(overlay);
> }
>
> static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7c5a6c5..ae96de5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
> PIPE_CONTROL_FLUSH_ENABLE);
> intel_ring_emit(signaller, lower_32_bits(gtt_offset));
> intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> + intel_ring_emit(signaller, signaller->preallocated_request->seqno);
> intel_ring_emit(signaller, 0);
> intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
> MI_SEMAPHORE_TARGET(waiter->id));
> @@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
> intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
> MI_FLUSH_DW_USE_GTT);
> intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> + intel_ring_emit(signaller, signaller->preallocated_request->seqno);
> intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
> MI_SEMAPHORE_TARGET(waiter->id));
> intel_ring_emit(signaller, 0);
> @@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
> if (mbox_reg != GEN6_NOSYNC) {
> intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
> intel_ring_emit(signaller, mbox_reg);
> - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> + intel_ring_emit(signaller, signaller->preallocated_request->seqno);
> }
> }
>
> @@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
>
> intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
> intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> - intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> + intel_ring_emit(ring, ring->preallocated_request->seqno);
> intel_ring_emit(ring, MI_USER_INTERRUPT);
> __intel_ring_advance(ring);
>
> @@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
> PIPE_CONTROL_WRITE_FLUSH |
> PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
> intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> - intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> + intel_ring_emit(ring, ring->preallocated_request->seqno);
> intel_ring_emit(ring, 0);
> PIPE_CONTROL_FLUSH(ring, scratch_addr);
> scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
> PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
> PIPE_CONTROL_NOTIFY);
> intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> - intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> + intel_ring_emit(ring, ring->preallocated_request->seqno);
> intel_ring_emit(ring, 0);
> __intel_ring_advance(ring);
>
> @@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
>
> intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
> intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> - intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> + intel_ring_emit(ring, ring->preallocated_request->seqno);
> intel_ring_emit(ring, MI_USER_INTERRUPT);
> __intel_ring_advance(ring);
>
> @@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> }
>
> ring->dev = dev;
> - INIT_LIST_HEAD(&ring->active_list);
> + INIT_LIST_HEAD(&ring->read_list);
> + INIT_LIST_HEAD(&ring->write_list);
> INIT_LIST_HEAD(&ring->request_list);
> ringbuf->size = 32 * PAGE_SIZE;
> memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
> @@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
> WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>
> intel_destroy_ringbuffer_obj(ringbuf);
> - ring->preallocated_lazy_request = NULL;
> - ring->outstanding_lazy_seqno = 0;
> + ring->preallocated_request = NULL;
>
> if (ring->cleanup)
> ring->cleanup(ring);
> @@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
> static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
> {
> struct intel_ringbuffer *ringbuf = ring->buffer;
> - struct drm_i915_gem_request *request;
> - u32 seqno = 0;
> + struct i915_gem_request *rq;
> int ret;
>
> if (ringbuf->last_retired_head != -1) {
> @@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
> return 0;
> }
>
> - list_for_each_entry(request, &ring->request_list, list) {
> - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
> - seqno = request->seqno;
> + list_for_each_entry(rq, &ring->request_list, list) {
> + if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
> break;
> - }
> }
>
> - if (seqno == 0)
> + if (rq == list_entry(&ring->request_list, typeof(*rq), list))
> return -ENOSPC;
>
> - ret = i915_wait_seqno(ring, seqno);
> + ret = i915_wait_request(rq);
> if (ret)
> return ret;
>
> @@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>
> int intel_ring_idle(struct intel_engine_cs *ring)
> {
> - u32 seqno;
> int ret;
>
> /* We need to add any requests required to flush the objects and ring */
> - if (ring->outstanding_lazy_seqno) {
> - ret = i915_add_request(ring, NULL);
> + if (ring->preallocated_request) {
> + ret = i915_add_request(ring);
> if (ret)
> return ret;
> }
> @@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
> if (list_empty(&ring->request_list))
> return 0;
>
> - seqno = list_entry(ring->request_list.prev,
> - struct drm_i915_gem_request,
> - list)->seqno;
> -
> - return i915_wait_seqno(ring, seqno);
> + return i915_wait_request(container_of(ring->request_list.prev,
> + struct i915_gem_request,
> + list));
> }
>
> static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
> {
> - if (ring->outstanding_lazy_seqno)
> - return 0;
> + struct i915_gem_request *rq;
> + int ret;
>
> - if (ring->preallocated_lazy_request == NULL) {
> - struct drm_i915_gem_request *request;
> + if (ring->preallocated_request)
> + return 0;
>
> - request = kmalloc(sizeof(*request), GFP_KERNEL);
> - if (request == NULL)
> - return -ENOMEM;
> + rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> + if (rq == NULL)
> + return -ENOMEM;
>
> - ring->preallocated_lazy_request = request;
> + ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> + if (ret) {
> + kfree(rq);
> + return ret;
> }
>
> - return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> + kref_init(&rq->kref);
> + rq->ring = ring;
> + rq->completed = false;
> +
> + ring->preallocated_request = rq;
> + return 0;
> }
>
> static int __intel_ring_prepare(struct intel_engine_cs *ring,
> @@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
> return ret;
>
> /* Preallocate the olr before touching the ring, */
> - ret = intel_ring_alloc_seqno(ring);
> + ret = intel_ring_alloc_request(ring);
> if (ret)
> return ret;
>
> @@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
> return ret;
>
> /* but we may flush the seqno during prepare. */
> - ret = intel_ring_alloc_seqno(ring);
> + ret = intel_ring_alloc_request(ring);
> if (ret)
> return ret;
>
> @@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> - BUG_ON(ring->outstanding_lazy_seqno);
> + BUG_ON(ring->preallocated_request);
>
> if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
> I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
> @@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
> ring->cleanup = render_ring_cleanup;
>
> ring->dev = dev;
> - INIT_LIST_HEAD(&ring->active_list);
> + INIT_LIST_HEAD(&ring->read_list);
> + INIT_LIST_HEAD(&ring->write_list);
> INIT_LIST_HEAD(&ring->request_list);
>
> ringbuf->size = size;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index dcd2e44..2a78051 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -222,7 +222,7 @@ struct intel_engine_cs {
> *
> * A reference is held on the buffer while on this list.
> */
> - struct list_head active_list;
> + struct list_head read_list, write_list, fence_list;
>
> /**
> * List of breadcrumbs associated with GPU requests currently
> @@ -233,8 +233,7 @@ struct intel_engine_cs {
> /**
> * Do we have some not yet emitted requests outstanding?
> */
> - struct drm_i915_gem_request *preallocated_lazy_request;
> - u32 outstanding_lazy_seqno;
> + struct i915_gem_request *preallocated_request;
> bool gpu_caches_dirty;
> bool fbc_dirty;
>
> @@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
> return ringbuf->tail;
> }
>
> -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
> +static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
> {
> - BUG_ON(ring->outstanding_lazy_seqno == 0);
> - return ring->outstanding_lazy_seqno;
> + BUG_ON(ring->preallocated_request == 0);
> + return ring->preallocated_request;
> }
>
> static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
> --
> 1.9.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list