[Intel-gfx] [PATCH 13/27] drm/i915/execlists: Pack the count into the low bits of the port.request

Thu Apr 27 14:37:38 UTC 2017

On Thu, Apr 20, 2017 at 03:58:19PM +0100, Tvrtko Ursulin wrote:
> > static void record_context(struct drm_i915_error_context *e,
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index 1642fff9cf13..370373c97b81 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -658,7 +658,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> > static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> > {
> > 	struct execlist_port *port = engine->execlist_port;
> >-	struct drm_i915_gem_request *last = port[0].request;
> >+	struct drm_i915_gem_request *last = port[0].request_count;
> 
> It's confusing that in this new scheme sometimes we have direct
> access to the request and sometimes we have to go through the
> port_request macro.
> 
> So maybe we should always use the port_request macro. Hm, could we
> invent a new type to help enforce that? Like:
> 
> struct drm_i915_gem_port_request_slot {
> 	struct drm_i915_gem_request *req_count;
> };
> 
> And then execlist port would contain these and helpers would need to
> be functions?
> 
> I've also noticed some GVT/GuC patches which sounded like they are
> adding the same single submission constraints so maybe now is the
> time to unify the dequeue? (Haven't looked at those patches deeper
> than the subject line so might be wrong.)
> 
> Not sure 100% of all the above, would need to sketch it. What are
> your thoughts?

I forsee a use for the count in guc as well, so conversion is ok with
me.

> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 7df278fe492e..69299fbab4f9 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -342,39 +342,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
> >
> > static void execlists_submit_ports(struct intel_engine_cs *engine)
> > {
> >-	struct drm_i915_private *dev_priv = engine->i915;
> > 	struct execlist_port *port = engine->execlist_port;
> > 	u32 __iomem *elsp =
> >-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
> >-	u64 desc[2];
> >-
> >-	GEM_BUG_ON(port[0].count > 1);
> >-	if (!port[0].count)
> >-		execlists_context_status_change(port[0].request,
> >-						INTEL_CONTEXT_SCHEDULE_IN);
> >-	desc[0] = execlists_update_context(port[0].request);
> >-	GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
> >-	port[0].count++;
> >-
> >-	if (port[1].request) {
> >-		GEM_BUG_ON(port[1].count);
> >-		execlists_context_status_change(port[1].request,
> >-						INTEL_CONTEXT_SCHEDULE_IN);
> >-		desc[1] = execlists_update_context(port[1].request);
> >-		GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
> >-		port[1].count = 1;
> >-	} else {
> >-		desc[1] = 0;
> >-	}
> >-	GEM_BUG_ON(desc[0] == desc[1]);
> >-
> >-	/* You must always write both descriptors in the order below. */
> >-	writel(upper_32_bits(desc[1]), elsp);
> >-	writel(lower_32_bits(desc[1]), elsp);
> >+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
> >+	unsigned int n;
> >+
> >+	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
> 
> We could also add for_each_req_port or something, to iterate and
> unpack either req only or the count as well?

for_each_port_reverse? We're looking at very special cases here!

I'm not sure and I'm playing with different structures.

> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index d25b88467e5e..39b733e5cfd3 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -377,8 +377,12 @@ struct intel_engine_cs {
> > 	/* Execlists */
> > 	struct tasklet_struct irq_tasklet;
> > 	struct execlist_port {
> >-		struct drm_i915_gem_request *request;
> >-		unsigned int count;
> >+		struct drm_i915_gem_request *request_count;
> 
> Would req(uest)_slot maybe be better?

It's definitely a count (of how many times this request has been
submitted), and I like long verbose names when I don't want them to be
used directly. So expect guc to be tidied.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre