[PATCH 5/5] drm/xe: Apply whitelist to engine save-restore
Lucas De Marchi
lucas.demarchi at intel.com
Mon Dec 9 21:13:17 UTC 2024
On Thu, Dec 05, 2024 at 12:42:03PM -0800, Matt Roper wrote:
>On Thu, Dec 05, 2024 at 10:22:40AM -0800, Lucas De Marchi wrote:
>> Instead of handling the whitelist directly in the GuC ADS
>> initialization, make it follow the same logic as other engine registers
>> that are save-restored. Main benefit is that then the SW tracking then
>> shows it in debugfs and there's no risk of an engine workaround to write
>> to the same nopriv register that is being passed directly to GuC.
>>
>> This means that xe_reg_whitelist_process_engine() then only process the
>> RTP, converting it to sr entries and then add those entries to the
>> engine sr. With that all the registers should be covered by
>> xe_reg_sr_apply_mmio() to write to the HW and there's no special
>> handling in GuC ADS to also add these registers.
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt.c | 4 +-
>> drivers/gpu/drm/xe/xe_guc_ads.c | 7 ----
>> drivers/gpu/drm/xe/xe_hw_engine.c | 1 -
>> drivers/gpu/drm/xe/xe_reg_sr.c | 49 ----------------------
>> drivers/gpu/drm/xe/xe_reg_whitelist.c | 59 +++++++++++++++++++++++++++
>> 5 files changed, 60 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index d6744be01a687..41ab7fbebc193 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt)
>> if (err)
>> return err;
>>
>> - for_each_hw_engine(hwe, gt, id) {
>> + for_each_hw_engine(hwe, gt, id)
>> xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
>> - xe_reg_sr_apply_whitelist(hwe);
>> - }
>>
>> /* Get CCS mode in sync between sw/hw */
>> xe_gt_apply_ccs_mode(gt);
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
>> index b0afb89d9d90c..943146e5b460d 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ads.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ads.c
>> @@ -243,8 +243,6 @@ static size_t calculate_regset_size(struct xe_gt *gt)
>> xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
>> count++;
>>
>> - count += RING_MAX_NONPRIV_SLOTS * XE_NUM_HW_ENGINES;
>> -
>> count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
>>
>> if (XE_WA(gt, 1607983814))
>> @@ -729,11 +727,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
>> xa_for_each(&hwe->reg_sr.xa, idx, entry)
>> guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
>>
>> - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
>> - guc_mmio_regset_write_one(ads, regset_map,
>> - RING_FORCE_TO_NONPRIV(hwe->mmio_base, i),
>> - count++);
>> -
>> for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
>> if (e->skip)
>> continue;
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index c4b0dc3be39c3..b193667441488 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -574,7 +574,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>> xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
>>
>> xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
>> - xe_reg_sr_apply_whitelist(hwe);
>>
>> hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
>> XE_BO_FLAG_VRAM_IF_DGFX(tile) |
>> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
>> index ea958400a15a1..9475e3f749580 100644
>> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
>> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
>> @@ -24,7 +24,6 @@
>> #include "xe_hw_engine_types.h"
>> #include "xe_macros.h"
>> #include "xe_mmio.h"
>> -#include "xe_reg_whitelist.h"
>> #include "xe_rtp_types.h"
>>
>> static void reg_sr_fini(struct drm_device *drm, void *arg)
>> @@ -192,54 +191,6 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
>> xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
>> }
>>
>> -void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
>> -{
>> - struct xe_reg_sr *sr = &hwe->reg_whitelist;
>> - struct xe_gt *gt = hwe->gt;
>> - struct xe_reg_sr_entry *entry;
>> - struct drm_printer p;
>> - u32 mmio_base = hwe->mmio_base;
>> - unsigned long reg;
>> - unsigned int slot = 0;
>> - unsigned int fw_ref;
>> -
>> - xe_gt_dbg(gt, "Whitelisting %s registers\n", sr->name);
>> -
>> - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>> - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
>> - goto err_force_wake;
>> -
>> - p = xe_gt_dbg_printer(gt);
>> - xa_for_each(&sr->xa, reg, entry) {
>> - if (slot == RING_MAX_NONPRIV_SLOTS) {
>> - xe_gt_err(gt,
>> - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
>> - hwe->name, RING_MAX_NONPRIV_SLOTS);
>> - break;
>> - }
>> -
>> - xe_reg_whitelist_print_entry(&p, 0, reg, entry);
>> - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
>> - reg | entry->set_bits);
>> - slot++;
>> - }
>> -
>> - /* And clear the rest just in case of garbage */
>> - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
>> - u32 addr = RING_NOPID(mmio_base).addr;
>> -
>> - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
>> - }
>> -
>> - xe_force_wake_put(gt_to_fw(gt), fw_ref);
>> -
>> - return;
>> -
>> -err_force_wake:
>> - xe_force_wake_put(gt_to_fw(gt), fw_ref);
>> - xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
>> -}
>> -
>> /**
>> * xe_reg_sr_dump - print all save/restore entries
>> * @sr: Save/restore entries
>> diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> index 3996934974fa0..0be105b305907 100644
>> --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
>> @@ -10,7 +10,9 @@
>> #include "regs/xe_oa_regs.h"
>> #include "regs/xe_regs.h"
>> #include "xe_gt_types.h"
>> +#include "xe_gt_printk.h"
>> #include "xe_platform_types.h"
>> +#include "xe_reg_sr.h"
>> #include "xe_rtp.h"
>> #include "xe_step.h"
>>
>> @@ -89,6 +91,62 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
>> {}
>> };
>>
>> +static int whitelist_add_to_hwe_sr(struct xe_reg_sr_entry *entry,
>> + unsigned int slot,
>> + struct xe_hw_engine *hwe)
>> +{
>> + struct xe_reg_sr_entry hwe_entry = (struct xe_reg_sr_entry) {
>> + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
>> + .set_bits = entry->reg.addr | entry->set_bits,
>> + .clr_bits = entry->set_bits,
>
>Should we just set clr_bits to ~0 since this entry is an exact setting
>for the entire register and we don't want to leave any of the flags (or
>currently unused bits) at their existing value?
right, this will make apply_one_mmio() to go through the `val = 0` logic
rather than reading the current value. Thanks.
Lucas De Marchi
>
>> + .read_mask = entry->read_mask,
>> + };
>> +
>> + return xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
>> +}
>> +
>> +static int whitelist_add_nopid_to_hwe_sr(unsigned int slot,
>> + struct xe_hw_engine *hwe)
>> +{
>> + struct xe_reg_sr_entry hwe_entry = (struct xe_reg_sr_entry) {
>> + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
>> + .set_bits = RING_NOPID(hwe->mmio_base).addr,
>> + .clr_bits = ~0u,
>> + };
>> +
>> + return xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
>> +}
>> +
>> +static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
>> +{
>> + struct xe_reg_sr *sr = &hwe->reg_whitelist;
>> + struct xe_reg_sr_entry *entry;
>> + struct drm_printer p;
>> + unsigned long reg;
>> + unsigned int slot;
>> +
>> + xe_gt_dbg(hwe->gt, "Whitelisting %s registers\n", sr->name);
>> + p = xe_gt_dbg_printer(hwe->gt);
>> +
>> + slot = 0;
>> + xa_for_each(&sr->xa, reg, entry) {
>> + if (slot == RING_MAX_NONPRIV_SLOTS) {
>> + xe_gt_err(hwe->gt,
>> + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
>> + hwe->name, RING_MAX_NONPRIV_SLOTS);
>> + break;
>> + }
>> +
>> + xe_reg_whitelist_print_entry(&p, 0, reg, entry);
>> + whitelist_add_to_hwe_sr(entry, slot, hwe);
>> + slot++;
>> + }
>> +
>> + /* And clear the rest just in case of garbage */
>
>If we still want to keep this (which I'm not sure is even really
>important), I think we should change the comment. We're not actually
>"clearing" anything (these registers don't have any kind of
>ignore/disable flag so they're always active). We should elaborate that
>we're just repeatedly adding a specific known register in all the
>remaining entries just so that we know exactly what's in them and so
>that when debugging we have a well-known value we can compare against.
>Since that NOPID register is already userspace accessible without any
>explicit whitelisting, these extra entries will wind up not having any
>functional effect.
>
>
>Matt
>
>> + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
>> + whitelist_add_nopid_to_hwe_sr(slot, hwe);
>> +}
>> +
>> /**
>> * xe_reg_whitelist_process_engine - process table of registers to whitelist
>> * @hwe: engine instance to process whitelist for
>> @@ -102,6 +160,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
>> struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>>
>> xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
>> + whitelist_apply_to_hwe(hwe);
>> }
>>
>> /**
>> --
>> 2.47.0
>>
>
>--
>Matt Roper
>Graphics Software Engineer
>Linux GPU Platform Enablement
>Intel Corporation
More information about the Intel-xe
mailing list