[Intel-xe] [PATCH] drm/xe: Raise GT frequency before GuC/HuC load
Rodrigo Vivi
rodrigo.vivi at intel.com
Thu Nov 9 18:23:53 UTC 2023
On Wed, Nov 08, 2023 at 04:33:39PM -0800, Vinay Belgaumkar wrote:
> Starting GT freq is usually RPn. Raising freq to RP0 will
> help speed up GuC load times. As an example, this data was
> collected on DG2-
>
> GuC Load time @RPn ~ 41 ms
> GuC Load time @RP0 ~ 11 ms
>
> v2: Raise GT freq before hwconfig init. This will speed up
> both HuC and GuC loads. Address review comments (Rodrigo).
> Also add a small usleep after requesting frequency which gives
> pcode some time to react.
>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++
> drivers/gpu/drm/xe/xe_gt.c | 4 +++
> drivers/gpu/drm/xe/xe_guc_pc.c | 49 ++++++++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_guc_pc.h | 1 +
> 4 files changed, 57 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cd1821d96a5d..12651d280591 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -244,6 +244,11 @@
>
> #define RPNSWREQ XE_REG(0xa008)
> #define REQ_RATIO_MASK REG_GENMASK(31, 23)
> +
> +#define RP_CONTROL XE_REG(0xa024)
> +#define RPSWCTL_MASK REG_GENMASK(10, 9)
> +#define RPSWCTL_ENABLE REG_FIELD_PREP(RPSWCTL_MASK, 2)
> +#define RPSWCTL_DISABLE REG_FIELD_PREP(RPSWCTL_MASK, 0)
> #define RC_CONTROL XE_REG(0xa090)
> #define RC_STATE XE_REG(0xa094)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 74e1f47bd401..a0b216b87bef 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -29,6 +29,7 @@
> #include "xe_gt_tlb_invalidation.h"
> #include "xe_gt_topology.h"
> #include "xe_guc_exec_queue_types.h"
> +#include "xe_guc_pc.h"
> #include "xe_hw_fence.h"
> #include "xe_hw_engine_class_sysfs.h"
> #include "xe_irq.h"
> @@ -340,6 +341,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
> if (err)
> goto err_force_wake;
>
> + /* Raise GT freq to speed up HuC/GuC load */
> + xe_guc_pc_init_early(>->uc.guc.pc);
> +
> err = xe_uc_init_hwconfig(>->uc);
> if (err)
> goto err_force_wake;
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> index d9375d1d582f..c6884f9df5fc 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -247,6 +247,12 @@ static u32 decode_freq(u32 raw)
> GEN9_FREQ_SCALER);
> }
>
> +static u32 encode_freq(u32 freq)
> +{
> + return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER,
> + GT_FREQUENCY_MULTIPLIER);
> +}
> +
> static u32 pc_get_min_freq(struct xe_guc_pc *pc)
> {
> u32 freq;
> @@ -257,6 +263,32 @@ static u32 pc_get_min_freq(struct xe_guc_pc *pc)
> return decode_freq(freq);
> }
>
> +static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
> +{
> + struct xe_gt *gt = pc_to_gt(pc);
> + u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
> +
> + /* Allow/Disallow punit to process software freq requests */
> + xe_mmio_write32(gt, RP_CONTROL, state);
> +}
> +
> +static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
> +{
> + struct xe_gt *gt = pc_to_gt(pc);
> + uint32_t rpnswreq;
> +
> + pc_set_manual_rp_ctrl(pc, true);
> +
> + /* Req freq is in units of 16.66 Mhz */
> + rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
> + xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
> +
> + /* Sleep for a small time to allow pcode to respond */
> + usleep_range(100, 300);
> +
> + pc_set_manual_rp_ctrl(pc, false);
> +}
> +
I believe we should move this functions to a new component...
but this is for later... maybe we will get an xe_freq soon and then
we refactor. But the gains are good and the patch is clean,
so let's move with it:
Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
> {
> /*
> @@ -685,6 +717,21 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
> else
> tgl_init_fused_rp_values(pc);
> }
> +
> +/**
> + * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
> + * frequency to allow faster GuC load times
> + * @pc: Xe_GuC_PC instance
> + */
> +void xe_guc_pc_init_early(struct xe_guc_pc *pc)
> +{
> + struct xe_gt *gt = pc_to_gt(pc);
> +
> + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
> + pc_init_fused_rp_values(pc);
> + pc_set_cur_freq(pc, pc->rp0_freq);
> +}
> +
> static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
> {
> int ret;
> @@ -918,8 +965,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
>
> pc->bo = bo;
>
> - pc_init_fused_rp_values(pc);
> -
> err = sysfs_create_files(gt->sysfs, pc_attrs);
> if (err)
> return err;
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> index 43ea582545b5..054788e006f3 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> @@ -17,4 +17,5 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
> enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
> u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
> u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
> +void xe_guc_pc_init_early(struct xe_guc_pc *pc);
> #endif /* _XE_GUC_PC_H_ */
> --
> 2.38.1
>
More information about the Intel-xe
mailing list