[PATCH v6 1/2] drm/xe/lnl: Apply Wa_22019338487
Belgaumkar, Vinay
vinay.belgaumkar at intel.com
Thu Jun 20 22:22:56 UTC 2024
On 6/20/2024 3:04 PM, Rodrigo Vivi wrote:
> On Tue, Jun 18, 2024 at 06:26:16PM -0700, Vinay Belgaumkar wrote:
>> This WA requires us to limit media GT frequency requests to a certain
>> cap value during driver load. Freq limits are restored after load
>> completes, so perf will not be affected during normal operations.
>>
>> During normal driver operation, this WA requires dummy writes to media
>> offset 0x380D8C after every ~63 GGTT writes. This will ensure completion
>> of the LMEM writes originating from Gunit.
>>
>> During driver unload(before FLR), the WA requires that we set requested
>> frequency to the cap value again.
>>
>> v3: Do not use WA number in function name. Call WA wrapper from xe_device.
>> Rename some variables, check for locks in the correct function (Rodrigo).
>> Ensure reset path is also covered for this WA.
>>
>> v4: Fix BAT failure
>>
>> v5: Add a function pointer for ggtt_ops (Michal W)
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>> drivers/gpu/drm/xe/Makefile | 3 ++
>> drivers/gpu/drm/xe/xe_device.c | 3 ++
>> drivers/gpu/drm/xe/xe_ggtt.c | 40 ++++++++++++++--
>> drivers/gpu/drm/xe/xe_ggtt.h | 1 +
>> drivers/gpu/drm/xe/xe_ggtt_types.h | 12 +++--
>> drivers/gpu/drm/xe/xe_gsc.c | 5 ++
>> drivers/gpu/drm/xe/xe_gt.c | 24 ++++++++++
>> drivers/gpu/drm/xe/xe_gt.h | 1 +
>> drivers/gpu/drm/xe/xe_guc_pc.c | 71 +++++++++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_guc_pc.h | 1 +
>> drivers/gpu/drm/xe/xe_guc_pc_types.h | 4 ++
>> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
>> 12 files changed, 157 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 20dc9759bb3c..b1e03bfe4a68 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -24,9 +24,12 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
>> $(call cmd,wa_oob)
>>
>> uses_generated_oob := \
>> + $(obj)/xe_ggtt.o \
>> $(obj)/xe_gsc.o \
>> + $(obj)/xe_gt.o \
>> $(obj)/xe_guc.o \
>> $(obj)/xe_guc_ads.o \
>> + $(obj)/xe_guc_pc.o \
>> $(obj)/xe_migrate.o \
>> $(obj)/xe_ring_ops.o \
>> $(obj)/xe_vm.o \
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 75d4c8ae9234..2172b7d0e36a 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -676,6 +676,9 @@ int xe_device_probe(struct xe_device *xe)
>>
>> xe_hwmon_register(xe);
>>
>> + for_each_gt(gt, xe, id)
>> + xe_gt_sanitize_freq(gt);
>> +
>> return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>>
>> err_fini_display:
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
>> index 8ff91fd1b7c8..c345298d4a14 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt.c
>> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
>> @@ -11,6 +11,7 @@
>> #include <drm/drm_drv.h>
>> #include <drm/drm_managed.h>
>> #include <drm/intel/i915_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>
>> #include "regs/xe_gt_regs.h"
>> #include "regs/xe_gtt_defs.h"
>> @@ -23,8 +24,10 @@
>> #include "xe_gt_sriov_vf.h"
>> #include "xe_gt_tlb_invalidation.h"
>> #include "xe_map.h"
>> +#include "xe_mmio.h"
>> #include "xe_pm.h"
>> #include "xe_sriov.h"
>> +#include "xe_wa.h"
>> #include "xe_wopcm.h"
>>
>> static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
>> @@ -69,6 +72,21 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
>> return ggms ? SZ_1M << ggms : 0;
>> }
>>
>> +static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
>> +{
>> + /*
>> + * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
>> + * to wait for completion of prior GTT writes before letting this through.
>> + * This needs to be done for all GGTT writes originating from the CPU.
>> + */
>> + lockdep_assert_held(&ggtt->lock);
>> +
>> + if ((++ggtt->access_count % 63) == 0) {
>> + xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
>> + ggtt->access_count = 0;
>> + }
>> +}
>> +
>> void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>> {
>> xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
>> @@ -77,6 +95,12 @@ void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
>> writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
>> }
>>
>> +void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte)
> I also just noticed that this doesn't need to be exported... it could/should be static.
>
> I have a patch for that ready, but if the check of the hook failure make you do another
> version, please make this static.
yup, good point. Incidentally, I found another caller which now needs to
use the fn ptr as well-
display/xe_fb_pin.c: xe_ggtt_set_pte(ggtt, *ggtt_ofs,
pte);
display/xe_fb_pin.c: xe_ggtt_set_pte(ggtt,
vma->node.start + x, pte);
which means we can make the original xe_ggtt_set_pte() static as well.
Thanks,
Vinay.
>
> Thanks,
> Rodrigo.
>
>> +{
>> + xe_ggtt_set_pte(ggtt, addr, pte);
>> + ggtt_update_access_counter(ggtt);
>> +}
>> +
>> static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
>> {
>> u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
>> @@ -92,7 +116,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
>> scratch_pte = 0;
>>
>> while (start < end) {
>> - xe_ggtt_set_pte(ggtt, start, scratch_pte);
>> + ggtt->pt_ops->set_pte(ggtt, start, scratch_pte);
>> start += XE_PAGE_SIZE;
>> }
>> }
>> @@ -124,10 +148,17 @@ static void primelockdep(struct xe_ggtt *ggtt)
>>
>> static const struct xe_ggtt_pt_ops xelp_pt_ops = {
>> .pte_encode_bo = xelp_ggtt_pte_encode_bo,
>> + .set_pte = xe_ggtt_set_pte,
>> };
>>
>> static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
>> .pte_encode_bo = xelpg_ggtt_pte_encode_bo,
>> + .set_pte = xe_ggtt_set_pte,
>> +};
>> +
>> +static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
>> + .pte_encode_bo = xelpg_ggtt_pte_encode_bo,
>> + .set_pte = xe_ggtt_set_pte_and_flush,
>> };
>>
>> /*
>> @@ -187,7 +218,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
>> ggtt->size = GUC_GGTT_TOP;
>>
>> if (GRAPHICS_VERx100(xe) >= 1270)
>> - ggtt->pt_ops = &xelpg_pt_ops;
>> + ggtt->pt_ops = ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487) ?
>> + &xelpg_pt_wa_ops : &xelpg_pt_ops;
>> else
>> ggtt->pt_ops = &xelp_pt_ops;
>>
>> @@ -394,7 +426,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
>>
>> for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
>> pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
>> - xe_ggtt_set_pte(ggtt, start + offset, pte);
>> + ggtt->pt_ops->set_pte(ggtt, start + offset, pte);
>> }
>> }
>>
>> @@ -502,7 +534,7 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node
>> return;
>>
>> while (start < end) {
>> - xe_ggtt_set_pte(ggtt, start, pte);
>> + ggtt->pt_ops->set_pte(ggtt, start, pte);
>> start += XE_PAGE_SIZE;
>> }
>>
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
>> index 4a41a1762358..0e36ed4cd68a 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt.h
>> +++ b/drivers/gpu/drm/xe/xe_ggtt.h
>> @@ -11,6 +11,7 @@
>> struct drm_printer;
>>
>> void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
>> +void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte);
>> int xe_ggtt_init_early(struct xe_ggtt *ggtt);
>> int xe_ggtt_init(struct xe_ggtt *ggtt);
>> void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
>> diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> index d8c584d9a8c3..5d3de08831f3 100644
>> --- a/drivers/gpu/drm/xe/xe_ggtt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
>> @@ -13,10 +13,6 @@
>> struct xe_bo;
>> struct xe_gt;
>>
>> -struct xe_ggtt_pt_ops {
>> - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
>> -};
>> -
>> struct xe_ggtt {
>> struct xe_tile *tile;
>>
>> @@ -34,6 +30,14 @@ struct xe_ggtt {
>> const struct xe_ggtt_pt_ops *pt_ops;
>>
>> struct drm_mm mm;
>> +
>> + /** @access_count: counts GGTT writes */
>> + unsigned int access_count;
>> +};
>> +
>> +struct xe_ggtt_pt_ops {
>> + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
>> + void (*set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte);
>> };
>>
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
>> index 80a61934decc..f8239a13fa2b 100644
>> --- a/drivers/gpu/drm/xe/xe_gsc.c
>> +++ b/drivers/gpu/drm/xe/xe_gsc.c
>> @@ -22,6 +22,7 @@
>> #include "xe_gt.h"
>> #include "xe_gt_mcr.h"
>> #include "xe_gt_printk.h"
>> +#include "xe_guc_pc.h"
>> #include "xe_huc.h"
>> #include "xe_map.h"
>> #include "xe_mmio.h"
>> @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
>> return ret;
>>
>> xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
>> +
>> + /* GSC load is done, restore expected GT frequencies */
>> + xe_gt_sanitize_freq(gt);
>> +
>> xe_gt_dbg(gt, "GSC FW async load completed\n");
>>
>> /* HuC auth failure is not fatal */
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 57d84751e160..759634cff1d8 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -9,6 +9,7 @@
>>
>> #include <drm/drm_managed.h>
>> #include <drm/xe_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>
>> #include "instructions/xe_gfxpipe_commands.h"
>> #include "instructions/xe_mi_commands.h"
>> @@ -54,6 +55,7 @@
>> #include "xe_sriov.h"
>> #include "xe_tuning.h"
>> #include "xe_uc.h"
>> +#include "xe_uc_fw.h"
>> #include "xe_vm.h"
>> #include "xe_wa.h"
>> #include "xe_wopcm.h"
>> @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt)
>> /* Get CCS mode in sync between sw/hw */
>> xe_gt_apply_ccs_mode(gt);
>>
>> + /* Restore GT freq to expected values */
>> + xe_gt_sanitize_freq(gt);
>> +
>> return 0;
>> }
>>
>> @@ -801,6 +806,25 @@ int xe_gt_suspend(struct xe_gt *gt)
>> return err;
>> }
>>
>> +/**
>> + * xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
>> + * @gt: the GT object
>> + *
>> + * Called after driver init/GSC load completes to restore GT frequencies if we
>> + * limited them for any WAs.
>> + */
>> +int xe_gt_sanitize_freq(struct xe_gt *gt)
>> +{
>> + int ret = 0;
>> +
>> + if ((!xe_uc_fw_is_available(>->uc.gsc.fw) ||
>> + xe_uc_fw_is_loaded(>->uc.gsc.fw)) &&
>> + XE_WA(gt, 22019338487))
>> + ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc);
>> +
>> + return ret;
>> +}
>> +
>> int xe_gt_resume(struct xe_gt *gt)
>> {
>> int err;
>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>> index 9073ac68a777..1123fdfc4ebc 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.h
>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>> int xe_gt_resume(struct xe_gt *gt);
>> void xe_gt_reset_async(struct xe_gt *gt);
>> void xe_gt_sanitize(struct xe_gt *gt);
>> +int xe_gt_sanitize_freq(struct xe_gt *gt);
>> void xe_gt_remove(struct xe_gt *gt);
>>
>> /**
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index 2b45a9cd3ec0..edf4a29a2aa3 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -8,6 +8,7 @@
>> #include <linux/delay.h>
>>
>> #include <drm/drm_managed.h>
>> +#include <generated/xe_wa_oob.h>
>>
>> #include "abi/guc_actions_slpc_abi.h"
>> #include "regs/xe_gt_regs.h"
>> @@ -25,6 +26,7 @@
>> #include "xe_mmio.h"
>> #include "xe_pcode.h"
>> #include "xe_pm.h"
>> +#include "xe_wa.h"
>>
>> #define MCHBAR_MIRROR_BASE_SNB 0x140000
>>
>> @@ -42,6 +44,8 @@
>> #define GT_FREQUENCY_MULTIPLIER 50
>> #define GT_FREQUENCY_SCALER 3
>>
>> +#define LNL_MERT_FREQ_CAP 800
>> +
>> /**
>> * DOC: GuC Power Conservation (PC)
>> *
>> @@ -695,6 +699,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>> tgl_init_fused_rp_values(pc);
>> }
>>
>> +static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
>> +{
>> + struct xe_gt *gt = pc_to_gt(pc);
>> +
>> + if (XE_WA(gt, 22019338487))
>> + return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
>> + else
>> + return pc->rp0_freq;
>> +}
>> +
>> /**
>> * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
>> * frequency to allow faster GuC load times
>> @@ -706,7 +720,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
>>
>> xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>> pc_init_fused_rp_values(pc);
>> - pc_set_cur_freq(pc, pc->rp0_freq);
>> + pc_set_cur_freq(pc, pc_max_freq_cap(pc));
>> }
>>
>> static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
>> @@ -762,6 +776,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
>> return ret;
>> }
>>
>> +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
>> +{
>> + int ret = 0;
>> +
>> + if (XE_WA(pc_to_gt(pc), 22019338487)) {
>> + /*
>> + * Get updated min/max and stash them.
>> + */
>> + ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
>> + if (!ret)
>> + ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
>> + if (ret)
>> + return ret;
>> +
>> + /*
>> + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
>> + */
>> + mutex_lock(&pc->freq_lock);
>> + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
>> + if (!ret)
>> + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
>> + mutex_unlock(&pc->freq_lock);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/**
>> + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values
>> + * @pc: The GuC PC
>> + *
>> + * Returns: 0 on success,
>> + * error code on failure
>> + */
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
>> +{
>> + int ret = 0;
>> +
>> + mutex_lock(&pc->freq_lock);
>> + ret = pc_set_max_freq(pc, pc->stashed_max_freq);
>> + if (!ret)
>> + ret = pc_set_min_freq(pc, pc->stashed_min_freq);
>> + mutex_unlock(&pc->freq_lock);
>> +
>> + return ret;
>> +}
>> +
>> /**
>> * xe_guc_pc_gucrc_disable - Disable GuC RC
>> * @pc: Xe_GuC_PC instance
>> @@ -911,6 +972,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
>> if (ret)
>> goto out;
>>
>> + ret = pc_set_mert_freq_cap(pc);
>> + if (ret)
>> + goto out;
>> +
>> if (xe->info.platform == XE_PVC) {
>> xe_guc_pc_gucrc_disable(pc);
>> ret = 0;
>> @@ -959,6 +1024,10 @@ static void xe_guc_pc_fini_hw(void *arg)
>> XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>> XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>> XE_WARN_ON(xe_guc_pc_stop(pc));
>> +
>> + /* Bind requested freq to mert_freq_cap before unload */
>> + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
>> +
>> xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
>> }
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
>> index 8a7b91ce1b3e..55fdb55ab688 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
>> @@ -32,5 +32,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>> u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>> u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>> void xe_guc_pc_init_early(struct xe_guc_pc *pc);
>> +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
>>
>> #endif /* _XE_GUC_PC_H_ */
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> index 2afd0dbc3542..13810be015db 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
>> @@ -25,6 +25,10 @@ struct xe_guc_pc {
>> u32 user_requested_min;
>> /** @user_requested_max: Stash the maximum requested freq by user */
>> u32 user_requested_max;
>> + /** @stashed_min_freq: Stash the current minimum freq */
>> + u32 stashed_min_freq;
>> + /** @stashed_max_freq: Stash the current maximum freq */
>> + u32 stashed_max_freq;
>> /** @freq_lock: Let's protect the frequencies */
>> struct mutex freq_lock;
>> /** @freq_ready: Only handle freq changes, if they are really ready */
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index 12fe88796a49..a6b897030fde 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -27,3 +27,4 @@
>> 16022287689 GRAPHICS_VERSION(2001)
>> GRAPHICS_VERSION(2004)
>> 13011645652 GRAPHICS_VERSION(2004)
>> +22019338487 MEDIA_VERSION(2000)
>> --
>> 2.38.1
>>
More information about the Intel-xe
mailing list