[PATCH v5 8/8] drm/xe/pf: Enable per-function engine activity stats
Riana Tauro
riana.tauro at intel.com
Fri Feb 7 06:25:17 UTC 2025
Hi Michal
On 2/7/2025 12:59 AM, Michal Wajdeczko wrote:
>
>
> On 06.02.2025 11:43, Riana Tauro wrote:
>> Enable per-function engine activity stats when
>> sriov_numvfs are set and disable when sriov_numvfs
>> are set to 0.
>
> instead referring to magic 'sriov_numvfs' attribute name just say
>
> ... when VFs are enabled / disabled
Okay will change this
>
>>
>> Also restart engine stats when VF's are reprovisioned
>
> shouldn't engine_activity take care of this on GT-reset on it's own?
> it shouldn't be tied to PF config/provisioning code
Wanted to add it after reprovisioning of VF's. Will move it out to
guc code
>
>>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 26 ++++++++++++++++++++--
>> drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 1 +
>> drivers/gpu/drm/xe/xe_pci_sriov.c | 25 +++++++++++++++++++++
>> 3 files changed, 50 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> index b1d994d65589..25855dcb6e42 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
>> @@ -23,6 +23,7 @@
>> #include "xe_guc_buf.h"
>> #include "xe_guc_ct.h"
>> #include "xe_guc_db_mgr.h"
>> +#include "xe_guc_engine_activity.h"
>> #include "xe_guc_fwif.h"
>> #include "xe_guc_id_mgr.h"
>> #include "xe_guc_klv_helpers.h"
>> @@ -1972,6 +1973,21 @@ static void pf_reset_config_thresholds(struct xe_gt *gt, struct xe_gt_sriov_conf
>> #undef reset_threshold_config
>> }
>>
>> +/**
>> + * xe_gt_sriov_pf_engine_stats - Enable/Disable engine stats for PF and VFs
>> + * @gt: the &xe_gt
>> + * @num_vfs: number of VFs to enable
>> + * @enable: enable/disable
>> + *
>> + * Enable or disable engine stats for PF and VF
>> + *
>> + * Return: 0 on success, negative error code otherwise
>> + */
>> +int xe_gt_sriov_pf_config_engine_stats(struct xe_gt *gt, unsigned int num_vfs, bool enable)
>
> wrong place
> this is not a config/provisioning related code
>
>> +{
>> + return xe_guc_engine_activity_function_stats(>->uc.guc, num_vfs, enable);
>> +}
>> +
>> static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
>> {
>> struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
>> @@ -2362,8 +2378,10 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
>> */
>> void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
>> {
>> - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
>> - unsigned int fail = 0, skip = 0;
>> + struct xe_device *xe = gt_to_xe(gt);
>> + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
>> + u16 num_vfs = pci_num_vf(to_pci_dev(xe->drm.dev));
>> + unsigned int fail = 0, skip = 0, ret = 0;
>>
>> for (n = 1; n <= total_vfs; n++) {
>> if (xe_gt_sriov_pf_config_is_empty(gt, n))
>> @@ -2372,6 +2390,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
>> fail++;
>> }
>>
>> + ret = xe_gt_sriov_pf_config_engine_stats(gt, num_vfs, true);
>> + if (ret)
>> + xe_gt_sriov_dbg(gt, "Failed to enable engine stats for PF and VF's %d\n",
>> + ret);
>> if (fail)
>> xe_gt_sriov_notice(gt, "Failed to push %u of %u VF%s configurations\n",
>> fail, total_vfs - skip, str_plural(total_vfs));
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> index f894e9d4abba..a5585b178e6b 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
>> @@ -62,6 +62,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
>> const void *buf, size_t size);
>>
>> bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
>> +int xe_gt_sriov_pf_config_engine_stats(struct xe_gt *gt, unsigned int num_vfs, bool enable);
>>
>> void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
>> index aaceee748287..612e64efb43c 100644
>> --- a/drivers/gpu/drm/xe/xe_pci_sriov.c
>> +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
>> @@ -62,6 +62,21 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
>> xe_gt_sriov_pf_control_trigger_flr(gt, n);
>> }
>>
>> +static int pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable)
>> +{
>> + struct xe_gt *gt;
>> + unsigned int id;
>> + int ret = 0;
>> +
>> + for_each_gt(gt, xe, id) {
>> + ret = xe_gt_sriov_pf_config_engine_stats(gt, num_vfs, enable);
>
> can't you directly call xe_guc_engine_activity_function_stats() here?
first patch had that. I added to pf_config for the restart on
suspend/resume and gt reset. Will move it out.
>
>> + if (ret)
>> + return ret;
>
> should we give up on the first failure? maybe just track first error?
>
>> + }
>> +
>> + return ret;
>
> it will be always 0 here
>
> but if we just track errors instead of early exit then we could print
> message here:
>
> xe_sriov_info(xe, "Failed to %s function activity stats (%pe)\n",
> str_enable_disable(enable), ERR_PTR(first_error));
Will fix this.
Thank you
Riana
>
>> +}
>> +
>> static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
>> {
>> struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>> @@ -94,6 +109,11 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
>>
>> xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
>> num_vfs, total_vfs, str_plural(total_vfs));
>> +
>> + err = pf_engine_activity_stats(xe, num_vfs, true);
>> + if (err < 0)
>> + xe_sriov_warn(xe, "Failed to enable function activity stats\n");
>> +
>> return num_vfs;
>>
>> failed:
>> @@ -110,6 +130,7 @@ static int pf_disable_vfs(struct xe_device *xe)
>> struct device *dev = xe->drm.dev;
>> struct pci_dev *pdev = to_pci_dev(dev);
>> u16 num_vfs = pci_num_vf(pdev);
>> + int err;
>>
>> xe_assert(xe, IS_SRIOV_PF(xe));
>> xe_sriov_dbg(xe, "disabling %u VF%s\n", num_vfs, str_plural(num_vfs));
>> @@ -117,6 +138,10 @@ static int pf_disable_vfs(struct xe_device *xe)
>> if (!num_vfs)
>> return 0;
>>
>> + err = pf_engine_activity_stats(xe, num_vfs, false);
>> + if (err < 0)
>> + xe_sriov_warn(xe, "Failed to disable function activity stats\n");
>> +
>> pci_disable_sriov(pdev);
>>
>> pf_reset_vfs(xe, num_vfs);
>
More information about the Intel-xe
mailing list