[PATCH 5/5] drm/xe: Apply whitelist to engine save-restore
Matt Roper
matthew.d.roper at intel.com
Thu Dec 5 20:42:03 UTC 2024
On Thu, Dec 05, 2024 at 10:22:40AM -0800, Lucas De Marchi wrote:
> Instead of handling the whitelist directly in the GuC ADS
> initialization, make it follow the same logic as other engine registers
> that are save-restored. Main benefit is that then the SW tracking then
> shows it in debugfs and there's no risk of an engine workaround to write
> to the same nopriv register that is being passed directly to GuC.
>
> This means that xe_reg_whitelist_process_engine() then only process the
> RTP, converting it to sr entries and then add those entries to the
> engine sr. With that all the registers should be covered by
> xe_reg_sr_apply_mmio() to write to the HW and there's no special
> handling in GuC ADS to also add these registers.
>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 4 +-
> drivers/gpu/drm/xe/xe_guc_ads.c | 7 ----
> drivers/gpu/drm/xe/xe_hw_engine.c | 1 -
> drivers/gpu/drm/xe/xe_reg_sr.c | 49 ----------------------
> drivers/gpu/drm/xe/xe_reg_whitelist.c | 59 +++++++++++++++++++++++++++
> 5 files changed, 60 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index d6744be01a687..41ab7fbebc193 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt)
> if (err)
> return err;
>
> - for_each_hw_engine(hwe, gt, id) {
> + for_each_hw_engine(hwe, gt, id)
> xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
> - xe_reg_sr_apply_whitelist(hwe);
> - }
>
> /* Get CCS mode in sync between sw/hw */
> xe_gt_apply_ccs_mode(gt);
> diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
> index b0afb89d9d90c..943146e5b460d 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ads.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ads.c
> @@ -243,8 +243,6 @@ static size_t calculate_regset_size(struct xe_gt *gt)
> xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
> count++;
>
> - count += RING_MAX_NONPRIV_SLOTS * XE_NUM_HW_ENGINES;
> -
> count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
>
> if (XE_WA(gt, 1607983814))
> @@ -729,11 +727,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
> xa_for_each(&hwe->reg_sr.xa, idx, entry)
> guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
>
> - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
> - guc_mmio_regset_write_one(ads, regset_map,
> - RING_FORCE_TO_NONPRIV(hwe->mmio_base, i),
> - count++);
> -
> for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
> if (e->skip)
> continue;
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index c4b0dc3be39c3..b193667441488 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -574,7 +574,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
> xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
>
> xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
> - xe_reg_sr_apply_whitelist(hwe);
>
> hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
> XE_BO_FLAG_VRAM_IF_DGFX(tile) |
> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
> index ea958400a15a1..9475e3f749580 100644
> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
> @@ -24,7 +24,6 @@
> #include "xe_hw_engine_types.h"
> #include "xe_macros.h"
> #include "xe_mmio.h"
> -#include "xe_reg_whitelist.h"
> #include "xe_rtp_types.h"
>
> static void reg_sr_fini(struct drm_device *drm, void *arg)
> @@ -192,54 +191,6 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
> xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
> }
>
> -void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
> -{
> - struct xe_reg_sr *sr = &hwe->reg_whitelist;
> - struct xe_gt *gt = hwe->gt;
> - struct xe_reg_sr_entry *entry;
> - struct drm_printer p;
> - u32 mmio_base = hwe->mmio_base;
> - unsigned long reg;
> - unsigned int slot = 0;
> - unsigned int fw_ref;
> -
> - xe_gt_dbg(gt, "Whitelisting %s registers\n", sr->name);
> -
> - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
> - goto err_force_wake;
> -
> - p = xe_gt_dbg_printer(gt);
> - xa_for_each(&sr->xa, reg, entry) {
> - if (slot == RING_MAX_NONPRIV_SLOTS) {
> - xe_gt_err(gt,
> - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
> - hwe->name, RING_MAX_NONPRIV_SLOTS);
> - break;
> - }
> -
> - xe_reg_whitelist_print_entry(&p, 0, reg, entry);
> - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
> - reg | entry->set_bits);
> - slot++;
> - }
> -
> - /* And clear the rest just in case of garbage */
> - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
> - u32 addr = RING_NOPID(mmio_base).addr;
> -
> - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
> - }
> -
> - xe_force_wake_put(gt_to_fw(gt), fw_ref);
> -
> - return;
> -
> -err_force_wake:
> - xe_force_wake_put(gt_to_fw(gt), fw_ref);
> - xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
> -}
> -
> /**
> * xe_reg_sr_dump - print all save/restore entries
> * @sr: Save/restore entries
> diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
> index 3996934974fa0..0be105b305907 100644
> --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
> +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
> @@ -10,7 +10,9 @@
> #include "regs/xe_oa_regs.h"
> #include "regs/xe_regs.h"
> #include "xe_gt_types.h"
> +#include "xe_gt_printk.h"
> #include "xe_platform_types.h"
> +#include "xe_reg_sr.h"
> #include "xe_rtp.h"
> #include "xe_step.h"
>
> @@ -89,6 +91,62 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
> {}
> };
>
> +static int whitelist_add_to_hwe_sr(struct xe_reg_sr_entry *entry,
> + unsigned int slot,
> + struct xe_hw_engine *hwe)
> +{
> + struct xe_reg_sr_entry hwe_entry = (struct xe_reg_sr_entry) {
> + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
> + .set_bits = entry->reg.addr | entry->set_bits,
> + .clr_bits = entry->set_bits,
Should we just set clr_bits to ~0 since this entry is an exact setting
for the entire register and we don't want to leave any of the flags (or
currently unused bits) at their existing value?
> + .read_mask = entry->read_mask,
> + };
> +
> + return xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
> +}
> +
> +static int whitelist_add_nopid_to_hwe_sr(unsigned int slot,
> + struct xe_hw_engine *hwe)
> +{
> + struct xe_reg_sr_entry hwe_entry = (struct xe_reg_sr_entry) {
> + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
> + .set_bits = RING_NOPID(hwe->mmio_base).addr,
> + .clr_bits = ~0u,
> + };
> +
> + return xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
> +}
> +
> +static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
> +{
> + struct xe_reg_sr *sr = &hwe->reg_whitelist;
> + struct xe_reg_sr_entry *entry;
> + struct drm_printer p;
> + unsigned long reg;
> + unsigned int slot;
> +
> + xe_gt_dbg(hwe->gt, "Whitelisting %s registers\n", sr->name);
> + p = xe_gt_dbg_printer(hwe->gt);
> +
> + slot = 0;
> + xa_for_each(&sr->xa, reg, entry) {
> + if (slot == RING_MAX_NONPRIV_SLOTS) {
> + xe_gt_err(hwe->gt,
> + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
> + hwe->name, RING_MAX_NONPRIV_SLOTS);
> + break;
> + }
> +
> + xe_reg_whitelist_print_entry(&p, 0, reg, entry);
> + whitelist_add_to_hwe_sr(entry, slot, hwe);
> + slot++;
> + }
> +
> + /* And clear the rest just in case of garbage */
If we still want to keep this (which I'm not sure is even really
important), I think we should change the comment. We're not actually
"clearing" anything (these registers don't have any kind of
ignore/disable flag so they're always active). We should elaborate that
we're just repeatedly adding a specific known register in all the
remaining entries just so that we know exactly what's in them and so
that when debugging we have a well-known value we can compare against.
Since that NOPID register is already userspace accessible without any
explicit whitelisting, these extra entries will wind up not having any
functional effect.
Matt
> + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
> + whitelist_add_nopid_to_hwe_sr(slot, hwe);
> +}
> +
> /**
> * xe_reg_whitelist_process_engine - process table of registers to whitelist
> * @hwe: engine instance to process whitelist for
> @@ -102,6 +160,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
> struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>
> xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
> + whitelist_apply_to_hwe(hwe);
> }
>
> /**
> --
> 2.47.0
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list