[igt-dev] [PATCH] test/perf: Add support for TGL in perf tests

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Oct 25 06:14:35 UTC 2019


On 24/10/2019 22:22, Umesh Nerlige Ramappa wrote:
> On Thu, Oct 24, 2019 at 02:22:51PM +0300, Lionel Landwerlin wrote:
>> On 14/10/2019 21:57, Umesh Nerlige Ramappa wrote:
>>> Add following changes to enable perf tests on TGL
>>> - Support only a single OA format
>>> - Add TGL metrics
>>> - Update whitelist test case
>>> - Cleanup mi-rpc test if it fails
>>> - Skip unsupported test - gen8-unprivileged-single-ctx-counters
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>> ---
>>>  tests/perf.c | 85 ++++++++++++++++++++++++++++++++++++++--------------
>>>  1 file changed, 62 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/tests/perf.c b/tests/perf.c
>>> index 5ad8b2db..c5fc6878 100644
>>> --- a/tests/perf.c
>>> +++ b/tests/perf.c
>>> @@ -159,6 +159,15 @@ static struct oa_format 
>>> gen8_oa_formats[I915_OA_FORMAT_MAX] = {
>>>          .b_off = 32, .n_b = 8, },
>>>  };
>>> +static struct oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>> +    [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = {
>>> +        "A32u40_A4u32_B8_C8", .size = 256,
>>> +        .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
>>> +        .a_off = 144, .n_a = 4, .first_a = 32,
>>> +        .b_off = 192, .n_b = 8,
>>> +        .c_off = 224, .n_c = 8, },
>>> +};
>>> +
>>>  static bool hsw_undefined_a_counters[45] = {
>>>      [4] = true,
>>>      [6] = true,
>>> @@ -206,7 +215,10 @@ get_oa_format(enum drm_i915_oa_format format)
>>>  {
>>>      if (IS_HASWELL(devid))
>>>          return hsw_oa_formats[format];
>>> -    return gen8_oa_formats[format];
>>> +    else if (IS_GEN12(devid))
>>> +        return gen12_oa_formats[format];
>>> +    else
>>> +        return gen8_oa_formats[format];
>>>  }
>>>  static void
>>> @@ -945,6 +957,8 @@ init_sys_info(void)
>>>              test_set_uuid = "db41edd4-d8e7-4730-ad11-b9a2d6833503";
>>>          } else if (IS_ICELAKE(devid)) {
>>>              test_set_uuid = "a291665e-244b-4b76-9b9a-01de9d3c8068";
>>> +        } else if (IS_TIGERLAKE(devid)) {
>>> +            test_set_uuid = "80a833f0-2504-4321-8894-e9277844ce7b";
>>>          } else {
>>>              igt_debug("unsupported GT\n");
>>>              return false;
>>> @@ -2813,6 +2827,7 @@ test_mi_rpc(void)
>>>      drm_intel_bo *bo;
>>>      uint32_t *report32;
>>>      int ret;
>>> +    uint32_t word0, word1, word63, word64;
>>>      stream_fd = __perf_open(drm_fd, &param, false);
>>> @@ -2842,18 +2857,26 @@ test_mi_rpc(void)
>>>      igt_assert_eq(ret, 0);
>>>      report32 = bo->virtual;
>>> -    igt_assert_eq(report32[0], 0xdeadbeef); /* report ID */
>>> -    igt_assert_neq(report32[1], 0); /* timestamp */
>>> -
>>> -    igt_assert_neq(report32[63], 0x80808080); /* end of report */
>>> -    igt_assert_eq(report32[64], 0x80808080); /* after 256 byte 
>>> report */
>>> +    word0 = report32[0];
>>> +    word1 = report32[1];
>>> +    word63 = report32[63];
>>> +    word64 = report32[64];
>>> +    /* cleanup before you fail for any sanity checks so that 
>>> subsequent
>>> +     * tests do not fail because of bad perf state.
>>> +     */
>>>      drm_intel_bo_unmap(bo);
>>>      drm_intel_bo_unreference(bo);
>>>      intel_batchbuffer_free(batch);
>>>      drm_intel_gem_context_destroy(context);
>>>      drm_intel_bufmgr_destroy(bufmgr);
>>>      __perf_close(stream_fd);
>>> +
>>> +    igt_assert_eq(word0, 0xdeadbeef); /* report ID */
>>> +    igt_assert_neq(word1, 0); /* timestamp */
>>> +
>>> +    igt_assert_neq(word63, 0x80808080); /* end of report */
>>> +    igt_assert_eq(word64, 0x80808080); /* after 256 byte report */
>>
>>
>> Any reason for this change? I can't see any functional change.
>
> When running the entire perf test suite, if this test fails/asserts, 
> all subsequent tests fail too (because perf_close is not called on 
> failure).  To avoid that, I moved the checks to a later point 
> (temporarily, the code might need additional cleanup).


That's strange, because __perf_open & __perf_close are supposed to deal 
with this.


>
>>
>>
>>>  }
>>>  static void
>>> @@ -3846,6 +3869,8 @@ test_whitelisted_registers_userspace_config(void)
>>>      uint32_t b_counters_regs[200];
>>>      uint32_t flex_regs[200];
>>>      uint32_t i;
>>> +    uint32_t oa_start_trig1, oa_start_trig8;
>>> +    uint32_t oa_report_trig1, oa_report_trig8;
>>>      uint64_t config_id;
>>>      char path[512];
>>>      int ret;
>>> @@ -3869,14 +3894,26 @@ 
>>> test_whitelisted_registers_userspace_config(void)
>>>      memset(&config, 0, sizeof(config));
>>>      memcpy(config.uuid, uuid, sizeof(config.uuid));
>>> +    if (intel_gen(devid) >= 12) {
>>> +        oa_start_trig1 = 0xd900;
>>> +        oa_start_trig8 = 0xd91c;
>>> +        oa_report_trig1 = 0xd920;
>>> +        oa_report_trig8 = 0xd93c;
>>> +    } else {
>>> +        oa_start_trig1 = 0x2710;
>>> +        oa_start_trig8 = 0x272c;
>>> +        oa_report_trig1 = 0x2740;
>>> +        oa_report_trig8 = 0x275c;
>>> +    }
>>> +
>>>      /* OASTARTTRIG[1-8] */
>>> -    for (i = 0x2710; i <= 0x272c; i += 4) {
>>> +    for (i = oa_start_trig1; i <= oa_start_trig8; i += 4) {
>>>          b_counters_regs[config.n_boolean_regs * 2] = i;
>>>          b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
>>>          config.n_boolean_regs++;
>>>      }
>>>      /* OAREPORTTRIG[1-8] */
>>> -    for (i = 0x2740; i <= 0x275c; i += 4) {
>>> +    for (i = oa_report_trig1; i <= oa_report_trig8; i += 4) {
>>>          b_counters_regs[config.n_boolean_regs * 2] = i;
>>>          b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
>>>          config.n_boolean_regs++;
>>> @@ -3896,10 +3933,6 @@ 
>>> test_whitelisted_registers_userspace_config(void)
>>>      /* Mux registers (too many of them, just checking bounds) */
>>>      i = 0;
>>> -    /* NOA_WRITE */
>>> -    mux_regs[i++] = 0x9800;
>>> -    mux_regs[i++] = 0;
>>> -
>>>      if (IS_HASWELL(devid)) {
>>>          /* Haswell specific. undocumented... */
>>>          mux_regs[i++] = 0x9ec0;
>>> @@ -3922,10 +3955,6 @@ 
>>> test_whitelisted_registers_userspace_config(void)
>>>          mux_regs[i++] = 0;
>>>      }
>>> -    /* HALF_SLICE_CHICKEN2 (shared with kernel workaround) */
>>> -    mux_regs[i++] = 0xE180;
>>> -    mux_regs[i++] = 0;
>>> -
>>>      if (IS_CHERRYVIEW(devid)) {
>>>          /* Cherryview specific. undocumented... */
>>>          mux_regs[i++] = 0x182300;
>>> @@ -3934,12 +3963,20 @@ 
>>> test_whitelisted_registers_userspace_config(void)
>>>          mux_regs[i++] = 0;
>>>      }
>>> -    /* PERFCNT[12] */
>>> -    mux_regs[i++] = 0x91B8;
>>> -    mux_regs[i++] = 0;
>>> -    /* PERFMATRIX */
>>> -    mux_regs[i++] = 0x91C8;
>>> -    mux_regs[i++] = 0;
>>> +    if (intel_gen(devid) <= 11) {
>>> +        /* NOA_WRITE */
>>> +        mux_regs[i++] = 0x9800;
>>> +        mux_regs[i++] = 0;
>>> +        /* HALF_SLICE_CHICKEN2 (shared with kernel workaround) */
>>> +        mux_regs[i++] = 0xE180;
>>> +        mux_regs[i++] = 0;
>>> +        /* PERFCNT[12] */
>>> +        mux_regs[i++] = 0x91B8;
>>> +        mux_regs[i++] = 0;
>>> +        /* PERFMATRIX */
>>> +        mux_regs[i++] = 0x91C8;
>>> +        mux_regs[i++] = 0;
>>> +    }
>>>      config.mux_regs_ptr = (uintptr_t) mux_regs;
>>>      config.n_mux_regs = i / 2;
>>> @@ -4170,8 +4207,10 @@ igt_main
>>>           * functionality to HW filter timer reports for a specific
>>>           * context (SKL+) can't stop multiple applications viewing
>>>           * system-wide data via MI_REPORT_PERF_COUNT commands.
>>> +         *
>>> +         * For gen12 implement a separate test that uses only OAR
>>
>>
>> Where is that test? :)
>
> :) wip. I gave it a shot - removed the code that collects samples from 
> oa buffer in this test, but it asserts later trying to compare the 
> counters accumulated. I need to debug that. I will post that in 
> another series/patch.
>
> Thanks,
> Umesh


Cool, I was looking forward to see a test that at least verifies the 
save/restore of the counters.

Pretty much all of the 3d pipeline counters (A counters) should not 
progress.


-Lionel


>
>>
>>
>> -Lionel
>>
>>
>>>           */
>>> -        igt_require(intel_gen(devid) >= 8);
>>> +        igt_require(intel_gen(devid) >= 8 && intel_gen(devid) < 12);
>>> gen8_test_single_ctx_render_target_writes_a_counter();
>>>      }
>>
>>
>



More information about the igt-dev mailing list