[PATCH 6/6] drm/xe: Introduce the busted_mode debugfs
Dafna Hirschfeld
dhirschfeld at habana.ai
Mon Mar 18 21:12:10 UTC 2024
On 15.03.2024 10:01, Rodrigo Vivi wrote:
>So, the busted mode can be selected at runtime with the device
>granularity, rather then a module policy.
>
>Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>Cc: Alan Previn <alan.previn.teres.alexis at intel.com>
>Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
>---
> drivers/gpu/drm/xe/xe_debugfs.c | 12 +++++++++
> drivers/gpu/drm/xe/xe_guc_ads.c | 46 +++++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_guc_ads.h | 1 +
> 3 files changed, 59 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
>index 175ba306c3eb..0cd20862d32e 100644
>--- a/drivers/gpu/drm/xe/xe_debugfs.c
>+++ b/drivers/gpu/drm/xe/xe_debugfs.c
>@@ -12,6 +12,7 @@
> #include "xe_bo.h"
> #include "xe_device.h"
> #include "xe_gt_debugfs.h"
>+#include "xe_guc_ads.h"
> #include "xe_pm.h"
> #include "xe_step.h"
>
>@@ -124,8 +125,10 @@ static ssize_t busted_mode_set(struct file *f, const char __user *ubuf,
> size_t size, loff_t *pos)
> {
> struct xe_device *xe = file_inode(f)->i_private;
>+ struct xe_gt *gt;
> u32 busted_mode;
> ssize_t ret;
>+ u8 id;
>
> ret = kstrtouint_from_user(ubuf, size, 0, &busted_mode);
> if (ret)
>@@ -136,6 +139,15 @@ static ssize_t busted_mode_set(struct file *f, const char __user *ubuf,
>
> mutex_lock(&xe->busted.lock);
> xe->busted.mode = busted_mode;
>+ if (busted_mode == 2) {
>+ for_each_gt(gt, xe, id) {
>+ ret = xe_guc_ads_scheduler_policy_disable_reset(>->uc.guc.ads);
>+ if (ret) {
>+ drm_err(&xe->drm, "Failed to update GuC ADS scheduler policy. GPU might still reset even on the busted_mode=2\n");
>+ break;
>+ }
>+ }
>+ }
> mutex_unlock(&xe->busted.lock);
>
> return size;
>diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
>index 43f0a88bbe8a..5dccdbe595bf 100644
>--- a/drivers/gpu/drm/xe/xe_guc_ads.c
>+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
>@@ -7,6 +7,7 @@
>
> #include <drm/drm_managed.h>
>
>+#include "abi/guc_actions_abi.h"
> #include "regs/xe_engine_regs.h"
> #include "regs/xe_gt_regs.h"
> #include "regs/xe_guc_regs.h"
>@@ -14,6 +15,7 @@
> #include "xe_gt.h"
> #include "xe_gt_ccs_mode.h"
> #include "xe_guc.h"
>+#include "xe_guc_ct.h"
> #include "xe_hw_engine.h"
> #include "xe_lrc.h"
> #include "xe_map.h"
>@@ -679,3 +681,47 @@ void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
> {
> guc_populate_golden_lrc(ads);
> }
>+
>+static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
>+{
>+ struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
>+ u32 action[] = {
>+ XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
>+ policy_offset
>+ };
>+
>+ return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
>+}
>+
>+int xe_guc_ads_scheduler_policy_disable_reset(struct xe_guc_ads *ads)
>+{
>+ struct xe_device *xe = ads_to_xe(ads);
>+ struct xe_gt *gt = ads_to_gt(ads);
>+ struct xe_tile *tile = gt_to_tile(gt);
>+ struct guc_policies *policies;
>+ struct xe_bo *bo;
>+ int ret = 0;
>+
>+ policies = kmalloc(sizeof(*policies), GFP_KERNEL);
>+ if (!policies)
>+ return -ENOMEM;
>+
>+ policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
>+ policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
>+ policies->is_valid = 1;
>+ if (xe->busted.mode == 2)
>+ policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
>+
>+ bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
>+ XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>+ XE_BO_CREATE_GGTT_BIT);
Hi,
This commit title is identical to the previous commit in this patchset. I think better
to change it to avoid confusion.
Also, the 'bo' created here is only released upon device release right? It can be releasejhlhhhhajhhhaas
immediately.
Thank,
Dafna
>+ if (IS_ERR(bo)) {
>+ ret = PTR_ERR(bo);
>+ goto out;
>+ }
>+
>+ ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
>+out:
>+ kfree(policies);
>+ return ret;
>+}
>diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
>index 138ef6267671..7c45c40fab34 100644
>--- a/drivers/gpu/drm/xe/xe_guc_ads.h
>+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
>@@ -13,5 +13,6 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
> void xe_guc_ads_populate(struct xe_guc_ads *ads);
> void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
> void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
>+int xe_guc_ads_scheduler_policy_disable_reset(struct xe_guc_ads *ads);
>
> #endif
>--
>2.44.0
>
More information about the Intel-xe
mailing list