[Intel-gfx] [PATCH 3/9] drm/i915/execlists: Pack the count into the low bits of the port.request
Chris Wilson
chris at chris-wilson.co.uk
Fri May 5 11:16:18 UTC 2017
On Fri, May 05, 2017 at 11:49:21AM +0100, Tvrtko Ursulin wrote:
>
> On 03/05/2017 12:37, Chris Wilson wrote:
> >+static void port_assign(struct execlist_port *port,
> >+ struct drm_i915_gem_request *rq)
> >+{
> >+ GEM_BUG_ON(rq == port_request(port));
> >+
> >+ if (port_isset(port))
> >+ i915_gem_request_put(port_request(port));
> >+
> >+ port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
> >+}
>
> Reason for not having one implementation of port_assign with
> enable_nested_signalling outside it in the guc case?
The handling of port.count is a big difference between guc and
ordinary execlists. For execlists we count contexts, for guc we count
requests.
> > static void execlists_dequeue(struct intel_engine_cs *engine)
> > {
> > struct drm_i915_gem_request *last;
> >@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > struct rb_node *rb;
> > bool submit = false;
> >
> >- last = port->request;
> >+ last = port_request(port);
> > if (last)
> > /* WaIdleLiteRestore:bdw,skl
> > * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
> >@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > */
> > last->tail = last->wa_tail;
> >
> >- GEM_BUG_ON(port[1].request);
> >+ GEM_BUG_ON(port_isset(&port[1]));
> >
> > /* Hardware submission is through 2 ports. Conceptually each port
> > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> >@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >
> > GEM_BUG_ON(last->ctx == cursor->ctx);
> >
> >- i915_gem_request_assign(&port->request, last);
> >+ if (submit)
> >+ port_assign(port, last);
>
> Optimisation? GEM_BUG_ON(last != port_request(port))?
Kind of, it is so both paths look identical and yes so we do meet the
criteria that last != port_request(port) (because it is silly to the do
the request_count dance if the goal is not to change request_count).
> >@@ -479,7 +484,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > submit = true;
> > }
> > if (submit) {
> >- i915_gem_request_assign(&port->request, last);
> >+ port_assign(port, last);
> > engine->execlist_first = rb;
> > }
> > spin_unlock_irq(&engine->timeline->lock);
> >@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > execlists_submit_ports(engine);
> > }
> >
> >-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
> >-{
> >- return !engine->execlist_port[0].request;
> >-}
> >-
> > static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
> > {
> > const struct execlist_port *port = engine->execlist_port;
> >
> >- return port[0].count + port[1].count < 2;
> >+ return port_count(&port[0]) + port_count(&port[1]) < 2;
> > }
> >
> > /*
> >@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
> > tail = GEN8_CSB_WRITE_PTR(head);
> > head = GEN8_CSB_READ_PTR(head);
> > while (head != tail) {
> >+ struct drm_i915_gem_request *rq;
> > unsigned int status;
> >+ unsigned int count;
> >
> > if (++head == GEN8_CSB_ENTRIES)
> > head = 0;
> >@@ -577,20 +579,24 @@ static void intel_lrc_irq_handler(unsigned long data)
> > GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
> > port[0].context_id);
> >
> >- GEM_BUG_ON(port[0].count == 0);
> >- if (--port[0].count == 0) {
> >+ rq = port_unpack(&port[0], &count);
> >+ GEM_BUG_ON(count == 0);
> >+ if (--count == 0) {
>
> If you changed this to be:
>
> count--;
> port_set(...);
> if (count > 0)
> continue;
>
> It would be perhaps a bit more readable, but a potentially useless
> port_set on the other hand. Not sure.
We expect to overwrite port in the common path, or at least that's my
expectation. Decrementing count for lite-restore should be the
exception. Part of the intention is to do the minimal amount of work
(especially avoiding the appearance of setting port.count = 0
prematurely) and pass the later port.count == 0 assert.
> > GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> >- GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
> >- execlists_context_status_change(port[0].request,
> >- INTEL_CONTEXT_SCHEDULE_OUT);
> >+ GEM_BUG_ON(!i915_gem_request_completed(rq));
> >+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> >+
> >+ trace_i915_gem_request_out(rq);
> >+ i915_gem_request_put(rq);
> >
> >- trace_i915_gem_request_out(port[0].request);
> >- i915_gem_request_put(port[0].request);
> > port[0] = port[1];
> > memset(&port[1], 0, sizeof(port[1]));
> >+ } else {
> >+ port_set(&port[0], port_pack(rq, count));
> > }
> >
> >- GEM_BUG_ON(port[0].count == 0 &&
> >+ /* After the final element, the hw should be idle */
> >+ GEM_BUG_ON(port_count(&port[0]) == 0 &&
> > !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > }
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index 4a599e3480a9..68765d45116b 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -368,8 +368,14 @@ struct intel_engine_cs {
> > /* Execlists */
> > struct tasklet_struct irq_tasklet;
> > struct execlist_port {
> >- struct drm_i915_gem_request *request;
> >- unsigned int count;
> >+ struct drm_i915_gem_request *request_count;
> >+#define EXECLIST_COUNT_BITS 2
> >+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
> >+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
> >+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
> >+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
> >+#define port_set(p, packed) ((p)->request_count = (packed))
> >+#define port_isset(p) ((p)->request_count)
> > GEM_DEBUG_DECL(u32 context_id);
> > } execlist_port[2];
> > struct rb_root execlist_queue;
> >
>
> Looks correct but I am still having trouble accepting the structure
> shrink and code savings are worth having less readable code.
Excluding port_unpack, I think it's a reasonable tidy.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list