[Intel-xe] [PATCH v3 2/9] drm/xe/rtp: Split rtp process initialization
Lucas De Marchi
lucas.demarchi at intel.com
Wed May 17 20:09:59 UTC 2023
On Wed, May 17, 2023 at 10:11:26AM -0700, Matt Roper wrote:
>On Tue, May 16, 2023 at 03:19:43PM -0700, Lucas De Marchi wrote:
>> The selection between hwe and gt is exposed to the outside of rtp, by
>> the xe_rtp_process() function. It doesn't make sense to pass a hwe and a
>> gt as argument since the gt should always be the one containing the hwe.
>>
>> This clarifies the interface by separating the context creation into an
>> initializer. The initializer then passes the correct value and there
>> should never be a case with hwe and gt set. Internally the functions
>> continue receiving the argument separately.
>>
>> Also, since most of the rtp rules match against device characteristics,
>> add support for processing rules based solely on the device. This will
>> be used by follow up commits to process some workarounds.
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>> ---
>> drivers/gpu/drm/xe/tests/xe_rtp_test.c | 3 +-
>> drivers/gpu/drm/xe/xe_hw_engine.c | 8 +--
>> drivers/gpu/drm/xe/xe_reg_whitelist.c | 4 +-
>> drivers/gpu/drm/xe/xe_rtp.c | 71 ++++++++++++++++++++------
>> drivers/gpu/drm/xe/xe_rtp.h | 10 +++-
>> drivers/gpu/drm/xe/xe_rtp_types.h | 15 ++++++
>> drivers/gpu/drm/xe/xe_tuning.c | 8 ++-
>> drivers/gpu/drm/xe/xe_wa.c | 12 +++--
>> 8 files changed, 102 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> index 4b2aac5ccf28..f96ef1987719 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> @@ -238,10 +238,11 @@ static void xe_rtp_process_tests(struct kunit *test)
>> struct xe_device *xe = test->priv;
>> struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
>> const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
>> + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]);
>> unsigned long idx, count = 0;
>>
>> xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
>> - xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
>> + xe_rtp_process(&ctx, param->entries, reg_sr);
>>
>> xa_for_each(®_sr->xa, idx, sre) {
>> if (idx == param->expected_reg.addr)
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index 751f6c3bba17..1bf28b1ad319 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -281,6 +281,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
>> const u8 mocs_read_idx = gt->mocs.uc_index;
>> u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
>> REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
>> + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>> const struct xe_rtp_entry lrc_was[] = {
>> /*
>> * Some blitter commands do not have a field for MOCS, those
>> @@ -299,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
>> {}
>> };
>>
>> - xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe);
>> + xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
>> }
>>
>> static void
>> @@ -311,7 +312,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>> const u8 mocs_read_idx = gt->mocs.uc_index;
>> u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
>> REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
>> - const struct xe_rtp_entry engine_was[] = {
>> + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>> + const struct xe_rtp_entry engine_entries[] = {
>> /*
>> * RING_CMD_CCTL specifies the default MOCS entry that will be
>> * used by the command streamer when executing commands that
>> @@ -332,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>> {}
>> };
>>
>> - xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe);
>> + xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr);
>> }
>>
>> static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
>> diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> index 7a2bb60ebd85..98f678d74445 100644
>> --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> @@ -63,7 +63,9 @@ static const struct xe_rtp_entry register_whitelist[] = {
>> */
>> void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
>> {
>> - xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
>> + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>> +
>> + xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
>> index 0c6a23e14a71..648a38bf8f0c 100644
>> --- a/drivers/gpu/drm/xe/xe_rtp.c
>> +++ b/drivers/gpu/drm/xe/xe_rtp.c
>> @@ -23,11 +23,11 @@
>> * the values to the registers that have matching rules.
>> */
>>
>> -static bool rule_matches(struct xe_gt *gt,
>> +static bool rule_matches(const struct xe_device *xe,
>> + struct xe_gt *gt,
>> struct xe_hw_engine *hwe,
>> const struct xe_rtp_entry *entry)
>> {
>> - const struct xe_device *xe = gt_to_xe(gt);
>> const struct xe_rtp_rule *r;
>> unsigned int i;
>> bool match;
>> @@ -62,22 +62,30 @@ static bool rule_matches(struct xe_gt *gt,
>> match = xe->info.step.graphics >= r->step_start &&
>> xe->info.step.graphics < r->step_end;
>> break;
>> + case XE_RTP_MATCH_INTEGRATED:
>> + match = !xe->info.is_dgfx;
>> + break;
>> + case XE_RTP_MATCH_DISCRETE:
>> + match = xe->info.is_dgfx;
>> + break;
>> case XE_RTP_MATCH_ENGINE_CLASS:
>> + if (drm_WARN_ON(&xe->drm, !hwe))
>> + return false;
>> +
>> match = hwe->class == r->engine_class;
>> break;
>> case XE_RTP_MATCH_NOT_ENGINE_CLASS:
>> + if (drm_WARN_ON(&xe->drm, !hwe))
>> + return false;
>> +
>> match = hwe->class != r->engine_class;
>> break;
>> case XE_RTP_MATCH_FUNC:
>> + if (drm_WARN_ON(&xe->drm, !gt))
>> + return false;
>> +
>> match = r->match_func(gt, hwe);
>
>Would it make more sense to adjust the signature of match_func to take
>xe as an additional parameter to allow general matching without a
>gt/hwe?
>
>Actually I wonder if we should just split out the addition of
>device-only support into its own commit later in the series closer to
>where it gets used.
I'm not sure because:
1) For the use cases where we have only xe, i.e. the OOB
workarounds, it would be odd to add the FUNC() in the .rules
file.
2) the context where the rules are executed is not the context
where the XE_WA() happens... we don't have a complete xe
initialized yet. I think that would tempt people to add the
callback function just to later realize the callback can't
really be used
What I think we may eventually want to do is to split the rules into:
device, gt and engine rules. However I think that may increase the
boilerplate code and make things harder rather than easier.
>
>Anyway, not a big deal either way; up to you whether to make any changes
>or not. Either way,
>
>Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
thanks
Lucas De Marchi
More information about the Intel-xe
mailing list