[Intel-gfx] [PATCH 8/8] drm/i915: Keep track of reserved execlist ports

Tue Sep 12 09:19:35 UTC 2017

Quoting Mika Kuoppala (2017-09-12 09:36:18)
> To further enchance port processing, keep track of
> reserved ports. This way we can iterate only the used subset
> of port space. Note that we lift the responsibility of
> execlists_submit_request() to inspect hw availability and
> always do dequeuing. This is to ensure that only the irq
> handler will be responsible for keeping track of available ports.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 43 ++++++++-------
>  drivers/gpu/drm/i915/i915_irq.c            |  2 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  5 +-
>  drivers/gpu/drm/i915/intel_lrc.c           | 84 ++++++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 50 +++++++++++++-----
>  5 files changed, 117 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 0dfb03a0cee4..fdda3a1835ad 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -662,12 +662,19 @@ static void port_assign(struct execlist_port *port,
>  static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>  {
>         struct intel_engine_execlist * const el = &engine->execlist;
> -       struct execlist_port *port = execlist_port_head(el);
> -       const struct execlist_port * const last_port = execlist_port_tail(el);
> -       struct drm_i915_gem_request *last = port_request(port);
> +       struct execlist_port *port;
> +       struct drm_i915_gem_request *last;
>         struct rb_node *rb;
>         bool submit = false;
>  
> +       if (execlist_active_ports(el)) {
> +               port = execlist_port_tail(el);
> +               last = port_request(port);
> +       } else {
> +               port = NULL;
> +               last = NULL;
> +       }
> +
>         spin_lock_irq(&engine->timeline->lock);
>         rb = el->first;
>         GEM_BUG_ON(rb_first(&el->queue) != rb);
> @@ -675,9 +682,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>                 struct drm_i915_gem_request *rq, *rn;
>  
> +               if (!port)
> +                       port = execlist_request_port(el);
> +
>                 list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
>                         if (last && rq->ctx != last->ctx) {
> -                               if (port == last_port) {
> +                               if (!execlist_inactive_ports(el)) {
>                                         __list_del_many(&p->requests,
>                                                         &rq->priotree.link);
>                                         goto done;
> @@ -686,7 +696,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>                                 if (submit)
>                                         port_assign(port, last);
>  
> -                               port = execlist_port_next(el, port);
> +                               port = execlist_request_port(el);
> +                               GEM_BUG_ON(port_isset(port));
>                         }
>  
>                         INIT_LIST_HEAD(&rq->priotree.link);
> @@ -717,26 +728,22 @@ static void i915_guc_irq_handler(unsigned long data)
>  {
>         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>         struct intel_engine_execlist * const el = &engine->execlist;
> -       struct execlist_port *port = execlist_port_head(el);
> -       const struct execlist_port * const last_port = execlist_port_tail(el);
> -       struct drm_i915_gem_request *rq;
> -       bool submit;
>  
>         do {
> -               rq = port_request(port);
> -               while (rq && i915_gem_request_completed(rq)) {
> +               while (execlist_active_ports(el)) {
> +                       struct execlist_port *port = execlist_port_head(el);
> +                       struct drm_i915_gem_request *rq = port_request(port);
> +
> +                       if (!i915_gem_request_completed(rq))
> +                               break;
> +
>                         trace_i915_gem_request_out(rq);
>                         i915_gem_request_put(rq);
>  
> -                       port = execlist_port_complete(el, port);
> -
> -                       rq = port_request(port);
> +                       execlist_release_port(el, port);
>                 }
>  
> -               submit = false;
> -               if (!port_count(last_port))
> -                       submit = i915_guc_dequeue(engine);
> -       } while (submit);
> +       } while (execlist_inactive_ports(el) && i915_guc_dequeue(engine));
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1919ac0b7b0f..de4e608786a8 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1312,7 +1312,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>         bool tasklet = false;
>  
>         if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
> -               if (port_count(execlist_port_head(el))) {
> +               if (execlist_active_ports(el)) {
>                         __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>                         tasklet = true;
>                 }
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 4b9eaec50070..b09212bb66b4 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -389,6 +389,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>         GEM_BUG_ON(el->port_mask >= EXECLIST_MAX_PORTS);
>  
>         el->port_head = 0;
> +       el->port_count = 0;
>  
>         el->queue = RB_ROOT;
>         el->first = NULL;
> @@ -1345,8 +1346,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
>         if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
>                 return false;
>  
> -       /* Both ports drained, no more ELSP submission? */
> -       if (port_request(execlist_port_head(&engine->execlist)))
> +       /* All ports drained, no more ELSP submission? */
> +       if (execlist_active_ports(&engine->execlist))
>                 return false;
>  
>         /* ELSP is empty, but there are ready requests? */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1a1c68c53fbd..18a87cf56b81 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -344,23 +344,27 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>         struct execlist_port *port;
>         unsigned int n;
>  
> +       for (n = 0; n < execlist_inactive_ports(el); n++) {
> +               writel(0, elsp);
> +               writel(0, elsp);
> +       }
> +
>         for_each_execlist_port_reverse(el, port, n) {
>                 struct drm_i915_gem_request *rq;
>                 unsigned int count;
>                 u64 desc;
>  
>                 rq = port_unpack(port, &count);
> -               if (rq) {
> -                       GEM_BUG_ON(count > !n);
> -                       if (!count++)
> -                               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> -                       port_set(port, port_pack(rq, count));
> -                       desc = execlists_update_context(rq);
> -                       GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
> -               } else {
> -                       GEM_BUG_ON(!n);
> -                       desc = 0;
> -               }
> +
> +               GEM_BUG_ON(!rq);
> +               GEM_BUG_ON(count > !n);
> +
> +               if (!count++)
> +                       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> +
> +               port_set(port, port_pack(rq, count));
> +               desc = execlists_update_context(rq);
> +               GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
>  
>                 writel(upper_32_bits(desc), elsp);
>                 writel(lower_32_bits(desc), elsp);
> @@ -398,15 +402,16 @@ static void port_assign(struct execlist_port *port,
>  
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
> -       struct drm_i915_gem_request *last;
>         struct intel_engine_execlist * const el = &engine->execlist;
> -       struct execlist_port *port = execlist_port_head(el);
> -       const struct execlist_port * const last_port = execlist_port_tail(el);
> +       struct execlist_port *port;
> +       struct drm_i915_gem_request *last;
>         struct rb_node *rb;
>         bool submit = false;
>  
> -       last = port_request(port);
> -       if (last)
> +       if (execlist_active_ports(el)) {
> +               port = execlist_port_tail(el);
> +               last = port_request(port);
> +
>                 /* WaIdleLiteRestore:bdw,skl
>                  * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
>                  * as we resubmit the request. See gen8_emit_breadcrumb()
> @@ -414,6 +419,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                  * request.
>                  */
>                 last->tail = last->wa_tail;
> +       } else {
> +               port = NULL;
> +               last = NULL;
> +       }
>  
>         /* Hardware submission is through 2 ports. Conceptually each port
>          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> @@ -443,6 +452,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>                 struct drm_i915_gem_request *rq, *rn;
>  
> +               if (!port)
> +                       port = execlist_request_port(el);
> +
>                 list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
>                         /*
>                          * Can we combine this request with the current port?
> @@ -461,7 +473,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                                  * combine this request with the last, then we
>                                  * are done.
>                                  */
> -                               if (port == last_port) {
> +                               if (!execlist_inactive_ports(el)) {
>                                         __list_del_many(&p->requests,
>                                                         &rq->priotree.link);
>                                         goto done;
> @@ -486,8 +498,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                                 if (submit)
>                                         port_assign(port, last);
>  
> -                               port = execlist_port_next(el, port);
> -
> +                               port = execlist_request_port(el);
>                                 GEM_BUG_ON(port_isset(port));
>                         }
>  
> @@ -518,9 +529,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>  static bool execlists_elsp_ready(struct intel_engine_execlist * const el)
>  {
> -       struct execlist_port * const port = execlist_port_head(el);
> +       const unsigned int active = execlist_active_ports(el);
> +
> +       if (!active)
> +               return true;
>  
> -       return port_count(port) + port_count(execlist_port_next(el, port)) < 2;
> +       return port_count(execlist_port_tail(el)) + active < 2;
>  }
>  
>  /*
> @@ -531,7 +545,6 @@ static void intel_lrc_irq_handler(unsigned long data)
>  {
>         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>         struct intel_engine_execlist * const el = &engine->execlist;
> -       struct execlist_port *port = execlist_port_head(el);
>         struct drm_i915_private *dev_priv = engine->i915;
>  
>         /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -571,6 +584,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>                 tail = GEN8_CSB_WRITE_PTR(head);
>                 head = GEN8_CSB_READ_PTR(head);
>                 while (head != tail) {
> +                       struct execlist_port *port;
>                         struct drm_i915_gem_request *rq;
>                         unsigned int status;
>                         unsigned int count;
> @@ -599,6 +613,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>                         if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>                                 continue;
>  
> +                       port = execlist_port_head(el);
>                         /* Check the context/desc id for this event matches */
>                         GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
>                                          port->context_id);
> @@ -613,13 +628,13 @@ static void intel_lrc_irq_handler(unsigned long data)
>                                 trace_i915_gem_request_out(rq);
>                                 i915_gem_request_put(rq);
>  
> -                               port = execlist_port_complete(el, port);
> +                               execlist_release_port(el, port);
>                         } else {
>                                 port_set(port, port_pack(rq, count));
>                         }
>  
>                         /* After the final element, the hw should be idle */
> -                       GEM_BUG_ON(port_count(port) == 0 &&
> +                       GEM_BUG_ON(execlist_active_ports(el) == 0 &&
>                                    !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>                 }
>  
> @@ -709,10 +724,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  
>         if (insert_request(engine,
>                            &request->priotree,
> -                          request->priotree.priority)) {
> -               if (execlists_elsp_ready(el))
> -                       tasklet_hi_schedule(&el->irq_tasklet);
> -       }
> +                          request->priotree.priority))
> +               tasklet_hi_schedule(&el->irq_tasklet);
>  
>         GEM_BUG_ON(!el->first);
>         GEM_BUG_ON(list_empty(&request->priotree.link));
> @@ -1334,7 +1347,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>                               struct drm_i915_gem_request *request)
>  {
>         struct intel_engine_execlist * const el = &engine->execlist;
> -       struct execlist_port *port = execlist_port_head(el);
>         struct intel_context *ce;
>  
>         /*
> @@ -1351,12 +1363,16 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>                 return;
>         }
>  
> -       if (request->ctx != port_request(port)->ctx) {
> -               i915_gem_request_put(port_request(port));
> -               execlist_port_complete(el, port);
> -       }
> +       if (execlist_active_ports(el)) {
> +               struct execlist_port *port = execlist_port_head(el);
>  
> -       GEM_BUG_ON(request->ctx != port_request(port)->ctx);
> +               if (request->ctx != port_request(port)->ctx) {
> +                       i915_gem_request_put(port_request(port));
> +                       execlist_release_port(el, port);
> +               }
> +
> +               GEM_BUG_ON(request->ctx != port_request(port)->ctx);
> +       }
>  
>         /* If the request was innocent, we leave the request in the ELSP
>          * and will try to replay it on restarting. The context image may
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 13d8e1115f3b..9c377f6eb8fe 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -249,6 +249,11 @@ struct intel_engine_execlist {
>         unsigned int port_head;
>  
>         /**
> +        * @port_count: reserved ports
> +        */
> +       unsigned int port_count;
> +
> +       /**
>          * @queue: queue of requests, in priority lists
>          */
>         struct rb_root queue;
> @@ -510,44 +515,65 @@ struct intel_engine_cs {
>  
>  #define for_each_execlist_port(el__, port__, n__) \
>         for ((n__) = 0; \
> -            (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_mask + 1; \
> +            (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_count; \
>              (n__)++)
>  
>  #define for_each_execlist_port_reverse(el__, port__, n__) \
> -       for ((n__) = (el__)->port_mask + 1; \
> +       for ((n__) = (el__)->port_count; \
>              (port__) = &(el__)->port[__port_idx((el__)->port_head - 1, (n__), (el__)->port_mask)], (n__)--;)
>  
>  static inline struct execlist_port *
>  execlist_port_head(struct intel_engine_execlist * const el)
>  {
> +       GEM_DEBUG_BUG_ON(!el->port_count);
> +
>         return &el->port[el->port_head];
>  }
>  
>  static inline struct execlist_port *
>  execlist_port_tail(struct intel_engine_execlist * const el)
>  {
> -       return &el->port[__port_idx(el->port_head, el->port_mask, el->port_mask)];
> +       GEM_DEBUG_BUG_ON(!el->port_count);
> +
> +       return &el->port[__port_idx(el->port_head, el->port_count - 1, el->port_mask)];
>  }
>  
> -static inline struct execlist_port *
> -execlist_port_next(struct intel_engine_execlist * const el,
> -                  const struct execlist_port * const port)
> +static inline unsigned int
> +execlist_active_ports(const struct intel_engine_execlist * const el)
>  {
> -       const unsigned int i = port_index(port, el);
> +       return READ_ONCE(el->port_count);
> +}
>  
> -       return &el->port[__port_idx(i, 1, el->port_mask)];
> +static inline unsigned int
> +execlist_inactive_ports(const struct intel_engine_execlist * const el)
> +{
> +       return el->port_mask + 1 - READ_ONCE(el->port_count);
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_complete(struct intel_engine_execlist * const el,
> -                      struct execlist_port * const port)
> +execlist_request_port(struct intel_engine_execlist * const el)
> +{
> +       GEM_DEBUG_BUG_ON(el->port_count == el->port_mask + 1);

The idea of the DEBUG ones are that they match GEM_DEBUG_EXEC, i.e. code
that only conditionally exists under debugging. GEM_BUG_ON() only exists
for debugging (CI) builds.
-Chris