[PATCH v5 8/8] drm/xe/pf: Enable per-function engine activity stats

Riana Tauro riana.tauro at intel.com
Fri Feb 7 06:25:17 UTC 2025


Hi Michal

On 2/7/2025 12:59 AM, Michal Wajdeczko wrote:
> 
> 
> On 06.02.2025 11:43, Riana Tauro wrote:
>> Enable per-function engine activity stats when
>> sriov_numvfs are set and disable when sriov_numvfs
>> are set to 0.
> 
> instead referring to magic 'sriov_numvfs' attribute name just say
> 
> ... when VFs are enabled / disabled

Okay will change this
> 
>>
>> Also restart engine stats when VF's are reprovisioned
> 
> shouldn't engine_activity take care of this on GT-reset on it's own?
> it shouldn't be tied to PF config/provisioning code

Wanted to add it after reprovisioning of VF's. Will move it out to
guc code
> 
>>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 26 ++++++++++++++++++++--
>>   drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h |  1 +
>>   drivers/gpu/drm/xe/xe_pci_sriov.c          | 25 +++++++++++++++++++++
>>   3 files changed, 50 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> index b1d994d65589..25855dcb6e42 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> @@ -23,6 +23,7 @@
>>   #include "xe_guc_buf.h"
>>   #include "xe_guc_ct.h"
>>   #include "xe_guc_db_mgr.h"
>> +#include "xe_guc_engine_activity.h"
>>   #include "xe_guc_fwif.h"
>>   #include "xe_guc_id_mgr.h"
>>   #include "xe_guc_klv_helpers.h"
>> @@ -1972,6 +1973,21 @@ static void pf_reset_config_thresholds(struct xe_gt *gt, struct xe_gt_sriov_conf
>>   #undef reset_threshold_config
>>   }
>>   
>> +/**
>> + * xe_gt_sriov_pf_engine_stats - Enable/Disable engine stats for PF and VFs
>> + * @gt: the &xe_gt
>> + * @num_vfs: number of VFs to enable
>> + * @enable: enable/disable
>> + *
>> + * Enable or disable engine stats for PF and VF
>> + *
>> + * Return: 0 on success, negative error code otherwise
>> + */
>> +int xe_gt_sriov_pf_config_engine_stats(struct xe_gt *gt, unsigned int num_vfs, bool enable)
> 
> wrong place
> this is not a config/provisioning related code
> 
>> +{
>> +	return xe_guc_engine_activity_function_stats(&gt->uc.guc, num_vfs, enable);
>> +}
>> +
>>   static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
>>   {
>>   	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
>> @@ -2362,8 +2378,10 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
>>    */
>>   void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
>>   {
>> -	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
>> -	unsigned int fail = 0, skip = 0;
>> +	struct xe_device *xe = gt_to_xe(gt);
>> +	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
>> +	u16 num_vfs = pci_num_vf(to_pci_dev(xe->drm.dev));
>> +	unsigned int fail = 0, skip = 0, ret = 0;
>>   
>>   	for (n = 1; n <= total_vfs; n++) {
>>   		if (xe_gt_sriov_pf_config_is_empty(gt, n))
>> @@ -2372,6 +2390,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
>>   			fail++;
>>   	}
>>   
>> +	ret = xe_gt_sriov_pf_config_engine_stats(gt, num_vfs, true);
>> +	if (ret)
>> +		xe_gt_sriov_dbg(gt, "Failed to enable engine stats for PF and VF's %d\n",
>> +				ret);
>>   	if (fail)
>>   		xe_gt_sriov_notice(gt, "Failed to push %u of %u VF%s configurations\n",
>>   				   fail, total_vfs - skip, str_plural(total_vfs));
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> index f894e9d4abba..a5585b178e6b 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> @@ -62,6 +62,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
>>   				  const void *buf, size_t size);
>>   
>>   bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
>> +int xe_gt_sriov_pf_config_engine_stats(struct xe_gt *gt, unsigned int num_vfs, bool enable);
>>   
>>   void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
>> index aaceee748287..612e64efb43c 100644
>> --- a/drivers/gpu/drm/xe/xe_pci_sriov.c
>> +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
>> @@ -62,6 +62,21 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
>>   			xe_gt_sriov_pf_control_trigger_flr(gt, n);
>>   }
>>   
>> +static int pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable)
>> +{
>> +	struct xe_gt *gt;
>> +	unsigned int id;
>> +	int ret = 0;
>> +
>> +	for_each_gt(gt, xe, id) {
>> +		ret = xe_gt_sriov_pf_config_engine_stats(gt, num_vfs, enable);
> 
> can't you directly call xe_guc_engine_activity_function_stats() here?
first patch had that. I added to pf_config for the restart on 
suspend/resume and gt reset. Will move it out.
> 
>> +		if (ret)
>> +			return ret;
> 
> should we give up on the first failure? maybe just track first error?
> 
>> +	}
>> +
>> +	return ret;
> 
> it will be always 0 here
> 
> but if we just track errors instead of early exit then we could print
> message here:
> 
> xe_sriov_info(xe, "Failed to %s function activity stats (%pe)\n",
> 		str_enable_disable(enable), ERR_PTR(first_error));
Will fix this.

Thank you
Riana
> 
>> +}
>> +
>>   static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
>>   {
>>   	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>> @@ -94,6 +109,11 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
>>   
>>   	xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
>>   		      num_vfs, total_vfs, str_plural(total_vfs));
>> +
>> +	err = pf_engine_activity_stats(xe, num_vfs, true);
>> +	if (err < 0)
>> +		xe_sriov_warn(xe, "Failed to enable function activity stats\n");
>> +
>>   	return num_vfs;
>>   
>>   failed:
>> @@ -110,6 +130,7 @@ static int pf_disable_vfs(struct xe_device *xe)
>>   	struct device *dev = xe->drm.dev;
>>   	struct pci_dev *pdev = to_pci_dev(dev);
>>   	u16 num_vfs = pci_num_vf(pdev);
>> +	int err;
>>   
>>   	xe_assert(xe, IS_SRIOV_PF(xe));
>>   	xe_sriov_dbg(xe, "disabling %u VF%s\n", num_vfs, str_plural(num_vfs));
>> @@ -117,6 +138,10 @@ static int pf_disable_vfs(struct xe_device *xe)
>>   	if (!num_vfs)
>>   		return 0;
>>   
>> +	err = pf_engine_activity_stats(xe, num_vfs, false);
>> +	if (err < 0)
>> +		xe_sriov_warn(xe, "Failed to disable function activity stats\n");
>> +
>>   	pci_disable_sriov(pdev);
>>   
>>   	pf_reset_vfs(xe, num_vfs);
> 



More information about the Intel-xe mailing list