[Intel-gfx] [PATCH 04/19] drm/i915/perf: Determine gen12 oa ctx offset at runtime

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Thu Sep 8 23:04:42 UTC 2022


On Thu, Sep 08, 2022 at 09:32:12PM +0300, Lionel Landwerlin wrote:
>On 06/09/2022 23:35, Umesh Nerlige Ramappa wrote:
>>On Tue, Sep 06, 2022 at 10:48:50PM +0300, Lionel Landwerlin wrote:
>>>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>>>Some SKUs of same gen12 platform may have different oactxctrl
>>>>offsets. For gen12, determine oactxctrl offsets at runtime.
>>>>
>>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>>>---
>>>> drivers/gpu/drm/i915/i915_perf.c         | 149 ++++++++++++++++++-----
>>>> drivers/gpu/drm/i915/i915_perf_oa_regs.h |   2 +-
>>>> 2 files changed, 120 insertions(+), 31 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>b/drivers/gpu/drm/i915/i915_perf.c
>>>>index 3526693d64fa..efa7eda83edd 100644
>>>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>@@ -1363,6 +1363,67 @@ static int 
>>>>gen12_get_render_context_id(struct i915_perf_stream *stream)
>>>>     return 0;
>>>> }
>>>>+#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
>>>>+#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == 
>>>>MI_OPCODE(MI_INSTR(0x22, 0)))
>>>>+#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
>>>
>>>
>>>Maybe you want to put this in intel_gpu_commands.h
>>>
>>>
>>>>+#define __valid_oactxctrl_offset(x) ((x) && (x) != U32_MAX)
>>>>+static bool __find_reg_in_lri(u32 *state, u32 reg, u32 *offset)
>>>>+{
>>>>+    u32 idx = *offset;
>>>>+    u32 len = MI_LRI_LEN(state[idx]) + idx;
>>>>+
>>>>+    idx++;
>>>>+    for (; idx < len; idx += 2)
>>>>+        if (state[idx] == reg)
>>>>+            break;
>>>>+
>>>>+    *offset = idx;
>>>>+    return state[idx] == reg;
>>>>+}
>>>>+
>>>>+static u32 __context_image_offset(struct intel_context *ce, u32 reg)
>>>>+{
>>>>+    u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
>>>>+    u32 *state = ce->lrc_reg_state;
>>>>+
>>>>+    for (offset = 0; offset < len; ) {
>>>>+        if (IS_MI_LRI_CMD(state[offset])) {
>>>
>>>I'm a bit concerned you might find other matches with this.
>>>
>>>Because let's say you run into a 3DSTATE_SUBSLICE_HASH_TABLE 
>>>instruction, you'll iterate the instruction dword by dword because 
>>>you don't know how to read its length and skip to the next one.
>>>
>>>Now some of the fields can be programmed from userspace to look 
>>>like an MI_LRI header, so you start to read data in the wrong way.
>>>
>>>
>>>Unfortunately I don't have a better solution. My only ask is that 
>>>you make __find_reg_in_lri() take the context image size in 
>>>parameter so it NEVER goes over the the context image.
>>>
>>>
>>>To limit the risk you should run this function only one at driver 
>>>initialization and store the found offset.
>>>
>>
>>Hmm, didn't know that there may be non-LRI commands in the context 
>>image or user could add to the context image somehow. Does using the 
>>context image size alone address these issues?
>>
>>Even after including the size in the logic, any reason you think we 
>>would be much more safer to do this from init? Is it because context 
>>image is not touched by user yet?
>
>
>The format of the image (commands in there and their offset) is fixed 
>per HW generation.
>
>Only the date in each of the commands will vary per context.
>
>In the case of MI_LRI, the register offsets are the same for all 
>context, but the value programmed will vary per context.
>
>So executing once should be enough to find the right offset, rather 
>than  every time we open the i915-perf stream.
>

In the current logic, the context image is traversed only once per 
driver load (even though the first time it happens is when a stream is 
opened). see saved_offset below.

>
>I think once you have the logic to make sure you never read outside 
>the image it should be alright.

ok, I will check that __find_reg_in_lri() does not go over the context 
image size.

Thanks,
Umesh

>
>
>-Lionel
>
>
>>
>>Thanks,
>>Umesh
>>
>>>
>>>Thanks,
>>>
>>>
>>>-Lionel
>>>
>>>
>>>>+            if (__find_reg_in_lri(state, reg, &offset))
>>>>+                break;
>>>>+        } else {
>>>>+            offset++;
>>>>+        }
>>>>+    }
>>>>+
>>>>+    return offset < len ? offset : U32_MAX;
>>>>+}
>>>>+
>>>>+static int __set_oa_ctx_ctrl_offset(struct intel_context *ce)
>>>>+{
>>>>+    i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
>>>>+    struct i915_perf *perf = &ce->engine->i915->perf;
>>>>+    u32 saved_offset = perf->ctx_oactxctrl_offset;
>>>>+    u32 offset;
>>>>+
>>>>+    /* Do this only once. Failure is stored as offset of U32_MAX */
>>>>+    if (saved_offset)
>>>>+        return 0;
>>>>+
>>>>+    offset = __context_image_offset(ce, i915_mmio_reg_offset(reg));
>>>>+    perf->ctx_oactxctrl_offset = offset;
>>>>+
>>>>+    drm_dbg(&ce->engine->i915->drm,
>>>>+        "%s oa ctx control at 0x%08x dword offset\n",
>>>>+        ce->engine->name, offset);
>>>>+
>>>>+    return __valid_oactxctrl_offset(offset) ? 0 : -ENODEV;
>>>>+}
>>>>+
>>>>+static bool engine_supports_mi_query(struct intel_engine_cs *engine)
>>>>+{
>>>>+    return engine->class == RENDER_CLASS;
>>>>+}
>>>>+
>>>> /**
>>>>  * oa_get_render_ctx_id - determine and hold ctx hw id
>>>>  * @stream: An i915-perf stream opened for OA metrics
>>>>@@ -1382,6 +1443,17 @@ static int oa_get_render_ctx_id(struct 
>>>>i915_perf_stream *stream)
>>>>     if (IS_ERR(ce))
>>>>         return PTR_ERR(ce);
>>>>+    if (engine_supports_mi_query(stream->engine)) {
>>>>+        ret = __set_oa_ctx_ctrl_offset(ce);
>>>>+        if (ret && !(stream->sample_flags & SAMPLE_OA_REPORT)) {
>>>>+            intel_context_unpin(ce);
>>>>+            drm_err(&stream->perf->i915->drm,
>>>>+                "Enabling perf query failed for %s\n",
>>>>+                stream->engine->name);
>>>>+            return ret;
>>>>+        }
>>>>+    }
>>>>+
>>>>     switch (GRAPHICS_VER(ce->engine->i915)) {
>>>>     case 7: {
>>>>         /*
>>>>@@ -2412,10 +2484,11 @@ static int 
>>>>gen12_configure_oar_context(struct i915_perf_stream *stream,
>>>>     int err;
>>>>     struct intel_context *ce = stream->pinned_ctx;
>>>>     u32 format = stream->oa_buffer.format;
>>>>+    u32 offset = stream->perf->ctx_oactxctrl_offset;
>>>>     struct flex regs_context[] = {
>>>>         {
>>>>             GEN8_OACTXCONTROL,
>>>>-            stream->perf->ctx_oactxctrl_offset + 1,
>>>>+            offset + 1,
>>>>             active ? GEN8_OA_COUNTER_RESUME : 0,
>>>>         },
>>>>     };
>>>>@@ -2440,15 +2513,18 @@ static int 
>>>>gen12_configure_oar_context(struct i915_perf_stream *stream,
>>>>         },
>>>>     };
>>>>-    /* Modify the context image of pinned context with regs_context*/
>>>>-    err = intel_context_lock_pinned(ce);
>>>>-    if (err)
>>>>-        return err;
>>>>+    /* Modify the context image of pinned context with regs_context */
>>>>+    if (__valid_oactxctrl_offset(offset)) {
>>>>+        err = intel_context_lock_pinned(ce);
>>>>+        if (err)
>>>>+            return err;
>>>>-    err = gen8_modify_context(ce, regs_context, 
>>>>ARRAY_SIZE(regs_context));
>>>>-    intel_context_unlock_pinned(ce);
>>>>-    if (err)
>>>>-        return err;
>>>>+        err = gen8_modify_context(ce, regs_context,
>>>>+                      ARRAY_SIZE(regs_context));
>>>>+        intel_context_unlock_pinned(ce);
>>>>+        if (err)
>>>>+            return err;
>>>>+    }
>>>>     /* Apply regs_lri using LRI with pinned context */
>>>>     return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), 
>>>>active);
>>>>@@ -2570,6 +2646,7 @@ lrc_configure_all_contexts(struct 
>>>>i915_perf_stream *stream,
>>>>                const struct i915_oa_config *oa_config,
>>>>                struct i915_active *active)
>>>> {
>>>>+    u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
>>>>     /* The MMIO offsets for Flex EU registers aren't contiguous */
>>>>     const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>>> #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>>>@@ -2580,7 +2657,7 @@ lrc_configure_all_contexts(struct 
>>>>i915_perf_stream *stream,
>>>>         },
>>>>         {
>>>>             GEN8_OACTXCONTROL,
>>>>-            stream->perf->ctx_oactxctrl_offset + 1,
>>>>+            ctx_oactxctrl + 1,
>>>>         },
>>>>         { EU_PERF_CNTL0, ctx_flexeuN(0) },
>>>>         { EU_PERF_CNTL1, ctx_flexeuN(1) },
>>>>@@ -4551,6 +4628,37 @@ static void 
>>>>oa_init_supported_formats(struct i915_perf *perf)
>>>>     }
>>>> }
>>>>+static void i915_perf_init_info(struct drm_i915_private *i915)
>>>>+{
>>>>+    struct i915_perf *perf = &i915->perf;
>>>>+
>>>>+    switch (GRAPHICS_VER(i915)) {
>>>>+    case 8:
>>>>+        perf->ctx_oactxctrl_offset = 0x120;
>>>>+        perf->ctx_flexeu0_offset = 0x2ce;
>>>>+        perf->gen8_valid_ctx_bit = BIT(25);
>>>>+        break;
>>>>+    case 9:
>>>>+        perf->ctx_oactxctrl_offset = 0x128;
>>>>+        perf->ctx_flexeu0_offset = 0x3de;
>>>>+        perf->gen8_valid_ctx_bit = BIT(16);
>>>>+        break;
>>>>+    case 11:
>>>>+        perf->ctx_oactxctrl_offset = 0x124;
>>>>+        perf->ctx_flexeu0_offset = 0x78e;
>>>>+        perf->gen8_valid_ctx_bit = BIT(16);
>>>>+        break;
>>>>+    case 12:
>>>>+        /*
>>>>+         * Calculate offset at runtime in oa_pin_context for gen12 and
>>>>+         * cache the value in perf->ctx_oactxctrl_offset.
>>>>+         */
>>>>+        break;
>>>>+    default:
>>>>+        MISSING_CASE(GRAPHICS_VER(i915));
>>>>+    }
>>>>+}
>>>>+
>>>> /**
>>>>  * i915_perf_init - initialize i915-perf state on module bind
>>>>  * @i915: i915 device instance
>>>>@@ -4589,6 +4697,7 @@ void i915_perf_init(struct 
>>>>drm_i915_private *i915)
>>>>          * execlist mode by default.
>>>>          */
>>>>         perf->ops.read = gen8_oa_read;
>>>>+        i915_perf_init_info(i915);
>>>>         if (IS_GRAPHICS_VER(i915, 8, 9)) {
>>>>             perf->ops.is_valid_b_counter_reg =
>>>>@@ -4608,18 +4717,6 @@ void i915_perf_init(struct 
>>>>drm_i915_private *i915)
>>>>             perf->ops.enable_metric_set = gen8_enable_metric_set;
>>>>             perf->ops.disable_metric_set = gen8_disable_metric_set;
>>>>             perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
>>>>-
>>>>-            if (GRAPHICS_VER(i915) == 8) {
>>>>-                perf->ctx_oactxctrl_offset = 0x120;
>>>>-                perf->ctx_flexeu0_offset = 0x2ce;
>>>>-
>>>>-                perf->gen8_valid_ctx_bit = BIT(25);
>>>>-            } else {
>>>>-                perf->ctx_oactxctrl_offset = 0x128;
>>>>-                perf->ctx_flexeu0_offset = 0x3de;
>>>>-
>>>>-                perf->gen8_valid_ctx_bit = BIT(16);
>>>>-            }
>>>>         } else if (GRAPHICS_VER(i915) == 11) {
>>>>             perf->ops.is_valid_b_counter_reg =
>>>>                 gen7_is_valid_b_counter_addr;
>>>>@@ -4633,11 +4730,6 @@ void i915_perf_init(struct 
>>>>drm_i915_private *i915)
>>>>             perf->ops.enable_metric_set = gen8_enable_metric_set;
>>>>             perf->ops.disable_metric_set = gen11_disable_metric_set;
>>>>             perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
>>>>-
>>>>-            perf->ctx_oactxctrl_offset = 0x124;
>>>>-            perf->ctx_flexeu0_offset = 0x78e;
>>>>-
>>>>-            perf->gen8_valid_ctx_bit = BIT(16);
>>>>         } else if (GRAPHICS_VER(i915) == 12) {
>>>>             perf->ops.is_valid_b_counter_reg =
>>>>                 gen12_is_valid_b_counter_addr;
>>>>@@ -4651,9 +4743,6 @@ void i915_perf_init(struct 
>>>>drm_i915_private *i915)
>>>>             perf->ops.enable_metric_set = gen12_enable_metric_set;
>>>>             perf->ops.disable_metric_set = gen12_disable_metric_set;
>>>>             perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
>>>>-
>>>>-            perf->ctx_flexeu0_offset = 0;
>>>>-            perf->ctx_oactxctrl_offset = 0x144;
>>>>         }
>>>>     }
>>>>diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
>>>>b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>>>index f31c9f13a9fc..0ef3562ff4aa 100644
>>>>--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>>>+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>>>@@ -97,7 +97,7 @@
>>>> #define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
>>>> #define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)
>>>>-#define GEN12_OACTXCONTROL _MMIO(0x2360)
>>>>+#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
>>>> #define GEN12_OAR_OASTATUS _MMIO(0x2968)
>>>> /* Gen12 OAG unit */
>>>
>>>
>


More information about the Intel-gfx mailing list