[Intel-gfx] [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context

Thu Aug 1 16:00:59 UTC 2019

Quoting Chris Wilson (2019-08-01 16:48:33)
> Quoting Tvrtko Ursulin (2019-08-01 16:29:53)
> > For instance Icelake engine dependent stuff sneaked into 
> > intel_lrc.c/lrc_desriptors at some point, which is also against the 
> > spirit of caching. If we were to move the cached value in ce then we 
> > would be able to remove that and have it once again minimal in there.
> 
> Well we can set all bits but hw_id/lrca at init time. How about if I run
> that past you?

 static u64
-lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
+base_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-       struct i915_gem_context *ctx = ce->gem_context;
        u64 desc;

        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -426,18 +425,12 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
        if (IS_GEN(engine->i915, 8))
                desc |= GEN8_CTX_L3LLC_COHERENT;

-       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
-                                                               /* bits 12-31 */
        /*
         * The following 32bits are copied into the OA reports (dword 2).
         * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
         * anything below.
         */
        if (INTEL_GEN(engine->i915) >= 11) {
-               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-                                                               /* bits 37-47 */
-
                desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
                                                                /* bits 48-53 */

@@ -445,8 +438,29 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)

                desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
                                                                /* bits 61-63 */
+       }
+
+       return desc;
+}
+
+static u64
+update_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+       struct i915_gem_context *ctx = ce->gem_context;
+       u64 desc = ce->lrc_desc;
+
+       desc &= ~GENMASK_ULL(31, 12);
+       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
+
+       if (INTEL_GEN(engine->i915) >= 11) {
+               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
+
+               desc &= ~GENMASK_ULL(47, 37);
+               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
        } else {
                GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
+
+               desc &= ~GENMASK_ULL(52, 32);
                desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;   /* bits 32-52 */
        }

@@ -1631,7 +1645,7 @@ __execlists_context_pin(struct intel_context *ce,
        if (ret)
                goto unpin_map;

-       ce->lrc_desc = lrc_descriptor(ce, engine);
+       ce->lrc_desc = update_lrc_descriptor(ce, engine);
        ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
        __execlists_update_reg_state(ce, engine);

@@ -3126,6 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
        ce->ring = ring;
        ce->state = vma;

+       ce->lrc_desc = base_lrc_descriptor(ce, engine);
+
        return 0;

 error_ring_free:

That's pretty much the same amount of work in context_pin. I'm not
convinced that caching between pins achieves very much.

Concur?
-Chris