[Intel-xe] [PATCH] drm/xe: Raise GT frequency before GuC load

Belgaumkar, Vinay vinay.belgaumkar at intel.com
Thu Nov 9 00:36:44 UTC 2023


On 11/3/2023 11:37 AM, Rodrigo Vivi wrote:
> On Fri, Oct 20, 2023 at 04:25:53PM -0700, Vinay Belgaumkar wrote:
>> Starting GT freq is usually RPn. Raising freq to RP0 will
>> help speed up GuC load times. As an example, this data was
>> collected on DG2-
>>
>> GuC Load time @RPn ~ 41 ms
>> GuC Load time @RP0 ~ 11 ms
>>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/regs/xe_gt_regs.h |  7 +++++
>>   drivers/gpu/drm/xe/xe_guc.c          |  3 ++
>>   drivers/gpu/drm/xe/xe_guc_pc.c       | 44 ++++++++++++++++++++++++++--
>>   drivers/gpu/drm/xe/xe_guc_pc.h       |  1 +
>>   4 files changed, 53 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> index cd1821d96a5d..0e6fe2ee4a2c 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> @@ -244,6 +244,13 @@
>>   
>>   #define RPNSWREQ				XE_REG(0xa008)
>>   #define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
>> +#define   SW_REQ_UNSLICE_RATIO_SHIFT		23
> please avoid shift and use the REG_FIELD_PREP and REG_FIELD_GET
ok.
>
>> +#define   IGNORE_SLICE_RATIO			(0 << 0)
> x | 0 = x
> so, please avoid it.
Sure.
>
>> +
>> +#define RP_CONTROL				XE_REG(0xa024)
>> +#define   RPSWCTL_SHIFT			9
> same here
>
>> +#define   RPSWCTL_ENABLE			(0x2 << RPSWCTL_SHIFT)
>> +#define   RPSWCTL_DISABLE			(0x0 << RPSWCTL_SHIFT)
> Please use REG_GENMASK and REG_FIELD_PREP
>
> something like
>
> #define   RPSWCTL_MASK		REG_GENMASK(something, 9)
> #define   RPSWCTL_ENABLE	REG_FIELD_PREP(RPSWCTL_MASK, 2)
> #define   RPSWCTL_DISABLE	REG_FIELD_PREP(RPSWCTL_MASK, 0)
ok.
>
>>   #define RC_CONTROL				XE_REG(0xa090)
>>   #define RC_STATE				XE_REG(0xa094)
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>> index 84f0b5488783..5af97982564a 100644
>> --- a/drivers/gpu/drm/xe/xe_guc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc.c
>> @@ -447,6 +447,9 @@ static int __xe_guc_upload(struct xe_guc *guc)
>>   {
>>   	int ret;
>>   
>> +	/* Raise GT freq to speed up GuC load */
>> +	xe_guc_pc_request_rp0(&guc->pc);
>> +
> I have a strange feeling with this. Semantically...
>
> guc_load asking guc_pc to do something...
>
> we will need to have a xe_freq component that is detached from
> the xe_guc_pc and then xe_freq exposes the sysfs and functions
> like this and then it requests to guc_pc when it makes sense.
>
> that can be done later, I agree, but we would need to at least
> give this function a better name....
sure.
>
>>   	guc_write_params(guc);
>>   	guc_prepare_xfer(guc);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index d9375d1d582f..e1543478c627 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -247,6 +247,12 @@ static u32 decode_freq(u32 raw)
>>   				 GEN9_FREQ_SCALER);
>>   }
>>   
>> +static u32 encode_freq(u32 freq)
>> +{
>> +	return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER,
>> +				 GT_FREQUENCY_MULTIPLIER);
>> +}
>> +
>>   static u32 pc_get_min_freq(struct xe_guc_pc *pc)
>>   {
>>   	u32 freq;
>> @@ -257,6 +263,28 @@ static u32 pc_get_min_freq(struct xe_guc_pc *pc)
>>   	return decode_freq(freq);
>>   }
>>   
>> +static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
>> +{
>> +	struct xe_gt *gt = pc_to_gt(pc);
>> +	u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
>> +
>> +	/* Allow/Disallow punit to process software freq requests */
>> +	xe_mmio_write32(gt, RP_CONTROL, state);
>> +}
>> +
>> +static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
>> +{
>> +	struct xe_gt *gt = pc_to_gt(pc);
>> +
>> +	pc_set_manual_rp_ctrl(pc, true);
>> +
>> +	/* Req freq is in units of 16.66 Mhz */
>> +	xe_mmio_write32(gt, RPNSWREQ, (encode_freq(freq) << SW_REQ_UNSLICE_RATIO_SHIFT)
>> +			| IGNORE_SLICE_RATIO);
>> +
>> +	pc_set_manual_rp_ctrl(pc, false);
>> +}
>> +
>>   static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
>>   {
>>   	/*
>> @@ -685,6 +713,20 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
>>   	else
>>   		tgl_init_fused_rp_values(pc);
>>   }
>> +
>> +/**
>> + * xe_guc_pc_request_rp0 - Request RP0
>> + * @pc: Xe_GuC_PC instance
> some more information here on the purpose and usage of this function
> is needed. Specially because it should be avoided on regular cases.
Added.
>
>> + */
>> +void xe_guc_pc_request_rp0(struct xe_guc_pc *pc)
>> +{
>> +	struct xe_gt *gt = pc_to_gt(pc);
>> +
>> +	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
>> +	pc_init_fused_rp_values(pc);
>> +	pc_set_cur_freq(pc, pc->rp0_freq);
>> +}
>> +
>>   static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
>>   {
>>   	int ret;
>> @@ -918,8 +960,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
>>   
>>   	pc->bo = bo;
>>   
>> -	pc_init_fused_rp_values(pc);
>> -
> hmmm... this also shows that we are then calling a function
> of this component before the component is initialized!!
>
> so, maybe we need a function called xe_guc_pc_init_early()
> and then this function by itself sets this pc->rp* stuff
> and manually force the freq to rp0. And we document that
> the pre_init or init_early needs to be called before GuC
> is loaded for faster loading of GuC.

Agree,

Thanks,

Vinay.

>
>>   	err = sysfs_create_files(gt->sysfs, pc_attrs);
>>   	if (err)
>>   		return err;
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
>> index 43ea582545b5..8bed5d9fc12a 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
>> @@ -17,4 +17,5 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
>>   enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
>>   u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
>> +void xe_guc_pc_request_rp0(struct xe_guc_pc *pc);
>>   #endif /* _XE_GUC_PC_H_ */
>> -- 
>> 2.38.1
>>


More information about the Intel-xe mailing list