[PATCH i-g-t 25/28] xe/oa/mdapi-xml-convert: Add support for 576B_PEC64LL format
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Thu Jun 20 23:43:35 UTC 2024
On Thu, Jun 20, 2024 at 01:00:50PM -0700, Ashutosh Dixit wrote:
>From: José Roberto de Souza <jose.souza at intel.com>
>
>Xe2 don't use the 256bytes long format, instead it uses a 576bytes
>long with 64 PEC fields that are 64 bits long.
>This patch fixes the xe2 definition and add the parser for this format.
>
>Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>---
> lib/xe/oa-configs/codegen.py | 5 ++
> lib/xe/oa-configs/mdapi-xml-convert.py | 76 +++++++++++++++++++++-----
> lib/xe/xe_oa.h | 1 +
> 3 files changed, 67 insertions(+), 15 deletions(-)
>
>diff --git a/lib/xe/oa-configs/codegen.py b/lib/xe/oa-configs/codegen.py
>index f078ef9498..51498f803d 100644
>--- a/lib/xe/oa-configs/codegen.py
>+++ b/lib/xe/oa-configs/codegen.py
>@@ -128,6 +128,7 @@ class Set:
> return self.xml.find(path)
>
>
>+#TODO add SqidiTotalCount, L3BankTotalCount, L3NodeTotalCount
> hw_vars_mapping = {
> "$EuCoresTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
> "$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" },
>@@ -159,6 +160,10 @@ hw_vars_mapping = {
> "$GpuMaxFrequency": { 'c': "perf->devinfo.gt_max_freq" },
> "$SkuRevisionId": { 'c': "perf->devinfo.revision" },
> "$QueryMode": { 'c': "perf->devinfo.query_mode" },
>+
>+ "$ComputeEngineTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
>+
>+ "$CopyEngineTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
> }
>
> def is_hw_var(name):
>diff --git a/lib/xe/oa-configs/mdapi-xml-convert.py b/lib/xe/oa-configs/mdapi-xml-convert.py
>index 575b53e955..602a3965af 100755
>--- a/lib/xe/oa-configs/mdapi-xml-convert.py
>+++ b/lib/xe/oa-configs/mdapi-xml-convert.py
>@@ -149,12 +149,10 @@ mtl_chipset_oam_samedia_params = {
> }
> }
>
>-# FIXME: everything except oa_report_size is incorrect here
>-xe2_chipset_params = {
>- 'a_offset': 16,
>- 'b_offset': 192,
>- 'c_offset': 224,
>+xe2_chipset_params_pec = {
>+ 'pec_offset': 32,
> 'oa_report_size': 576,
>+ # TODO: Not sure about the configs below
> 'config_reg_blacklist': {
> 0x2364, # OACTXID
> },
>@@ -183,7 +181,11 @@ mtl_chipset_oa_formats = {
> }
>
> xe2_chipset_oa_formats = {
>- '256B_GENERIC_NOA16': xe2_chipset_params,
>+ '576B_PEC64LL': xe2_chipset_params_pec,
>+ # We only care about 576B_PEC64LL metrics in Xe2, entries below is just to
>+ # surpress warnings
>+ '256B_GENERIC_NOA16': xehpsdv_chipset_params,
>+ '128B_MPEC8_NOA16': mtl_chipset_oam_samedia_params,
> }
>
> chipsets = {
>@@ -209,8 +211,6 @@ chipsets = {
>
> xehp_plus = ( 'ACM', 'MTL' )
>
>-xe2_plus = ( 'LNL' )
>-
> register_types = { 'OA', 'NOA', 'FLEX', 'PM' }
>
> default_set_blacklist = { "RenderDX1x", # TODO: rename to something non 'DX'
>@@ -298,6 +298,46 @@ def read_token_to_rpn_read_oam(chipset, token, raw_offsets, oa_format):
>
> assert 0
>
>+def read_token_to_rpn_read_pec(chipset, token, raw_offsets, oa_format):
>+ width, offset_str = token.split('@')
>+ offset = int(offset_str, 16)
>+
>+ if width != 'qw':
>+ assert 0
>+
>+ if raw_offsets:
>+ pec_offset = chipsets[chipset][oa_format]['pec_offset']
>+
>+ if offset < pec_offset:
>+ if offset == 8:
>+ return "GPU_TIME 0 READ"
>+ elif offset == 24:
>+ return "GPU_CLOCK 0 READ"
>+ else:
>+ assert 0
>+ else:
>+ pec_cnt_offset = int((offset - pec_offset) / 8)
>+
>+ return "PEC " + str(pec_cnt_offset) + " READ"
>+ else:
>+ # Location in the accumulated deltas
>+ idx = int(offset / 8)
>+
>+ if idx == 0:
>+ return "GPU_TIME 0 READ"
>+ elif idx == 1:
>+ return "GPU_CLOCK 0 READ"
>+ else:
>+ idx = idx - 2;
>+ pec_cnt_offset = str(idx)
>+ pec_high_low_text = "low"
>+ if (offset % 8) > 0:
>+ pec_high_low_text = "high"
>+
>+ return "PEC " + pec_cnt_offset + " READ"
>+
>+ assert 0
>+
> def read_token_to_rpn_read_oag(chipset, token, raw_offsets, oa_format):
> width, offset_str = token.split('@')
>
>@@ -374,9 +414,6 @@ def read_token_to_rpn_read_oag(chipset, token, raw_offsets, oa_format):
> return "C " + str(idx - 48) + " READ"
> else:
> return "{0} READ".format(read_value(chipset, offset, oa_format))
>- elif chipset in xe2_plus:
>- # FIXME: skip all metrics to retain just the registers
>- return "GPU_TIME 0 READ"
> else:
> # For Gen8+ the array of accumulated counters is
> # assumed to start with a GPU_TIME then GPU_CLOCK,
>@@ -405,6 +442,9 @@ def read_token_to_rpn_read(chipset, token, raw_offsets, oa_format):
> if oa_format in ['192B_MPEC8LL_NOA16', '128B_MPEC8_NOA16']:
> return read_token_to_rpn_read_oam(chipset, token, raw_offsets, oa_format)
>
>+ if oa_format in ['576B_PEC64LL']:
>+ return read_token_to_rpn_read_pec(chipset, token, raw_offsets, oa_format)
>+
> assert 0
>
> def replace_read_tokens_with_rpn_read_ops(chipset, oa_format, equation, raw_offsets):
>@@ -905,6 +945,8 @@ for arg in args.xml:
> mdapi_counter.set('NormalizationEquation', '$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV')
> #mdapi_counter.set('DeltaReportReadEquation', '$GpuCoreClocks $GpuTime UDIV')
>
>+ if mdapi_counter.get('SymbolName') == "ResultUncertainty":
>+ continue
>
> symbol_name = oa_registry.Registry.sanitize_symbol_name(mdapi_counter.get('SymbolName'))
>
>@@ -992,11 +1034,15 @@ for arg in args.xml:
> # XXX: As a special case, we override the raw and delta report
> # equations for the GpuTime counters, which seem inconsistent
> if mdapi_counter.get('SymbolName') == "GpuTime":
>- mdapi_counter.set('DeltaReportReadEquation', "qw at 0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
>- if chipset == 'MTL' and oa_format != '256B_GENERIC_NOA16':
>- mdapi_counter.set('SnapshotReportReadEquation', "qw at 0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
>+ if oa_format == '576B_PEC64LL':
>+ mdapi_counter.set('DeltaReportReadEquation', "qw at 0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
>+ mdapi_counter.set('SnapshotReportReadEquation', "qw at 0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
> else:
>- mdapi_counter.set('SnapshotReportReadEquation', "dw at 0x04 1000000000 UMUL $GpuTimestampFrequency UDIV")
>+ mdapi_counter.set('DeltaReportReadEquation', "qw at 0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
>+ if chipset == 'MTL' and oa_format != '256B_GENERIC_NOA16':
>+ mdapi_counter.set('SnapshotReportReadEquation', "qw at 0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
>+ else:
>+ mdapi_counter.set('SnapshotReportReadEquation', "dw at 0x04 1000000000 UMUL $GpuTimestampFrequency UDIV")
>
> availability = fixup_equation(mdapi_counter.get('AvailabilityEquation'))
> if availability == "":
>diff --git a/lib/xe/xe_oa.h b/lib/xe/xe_oa.h
>index c16177ec8e..f6f2768b00 100644
>--- a/lib/xe/xe_oa.h
>+++ b/lib/xe/xe_oa.h
>@@ -275,6 +275,7 @@ struct intel_xe_perf_metric_set {
> int b_offset;
> int c_offset;
> int perfcnt_offset;
>+ int pec_offset;
>
> const struct intel_xe_perf_register_prog *b_counter_regs;
> uint32_t n_b_counter_regs;
>--
>2.41.0
>
More information about the igt-dev
mailing list