<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 12-04-2024 19:12, Lucas De Marchi
wrote:<br>
</div>
<blockquote type="cite" cite="mid:7ioxhkpm4jidj5m4hcc5jsatfk333d4sgso7o5bl2pqmq5xxyc@36dmyunxbviy">On
Fri, Apr 12, 2024 at 01:32:45PM +0530, Himal Prasad Ghimiray
wrote:
<br>
<blockquote type="cite">xe_pm_init may encounter failures for
various reasons, such as a failure
<br>
in initializing drmm_mutex, or when dealing with a
d3cold-capable device
<br>
for vram_threshold sysfs creation and setting default threshold.
<br>
Presently, all these potential failures are disregarded.
<br>
<br>
Move d3cold.lock initialization to xe_pm_init_early and cause
driver
<br>
abort if mutex initialization has failed.
<br>
<br>
Warn about failure to create and setting default threshold.
<br>
<br>
Cc: Lucas De Marchi <a class="moz-txt-link-rfc2396E" href="mailto:lucas.demarchi@intel.com"><lucas.demarchi@intel.com></a>
<br>
Cc: Rodrigo Vivi <a class="moz-txt-link-rfc2396E" href="mailto:rodrigo.vivi@intel.com"><rodrigo.vivi@intel.com></a>
<br>
Signed-off-by: Himal Prasad Ghimiray
<a class="moz-txt-link-rfc2396E" href="mailto:himal.prasad.ghimiray@intel.com"><himal.prasad.ghimiray@intel.com></a>
<br>
---
<br>
drivers/gpu/drm/xe/xe_device_sysfs.c | 12 ++++------
<br>
drivers/gpu/drm/xe/xe_device_sysfs.h | 2 +-
<br>
drivers/gpu/drm/xe/xe_pm.c | 34
+++++++++++++++++++++++-----
<br>
drivers/gpu/drm/xe/xe_pm.h | 2 +-
<br>
4 files changed, 34 insertions(+), 16 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c
b/drivers/gpu/drm/xe/xe_device_sysfs.c
<br>
index e47c8ad1bb17..21677b8cd977 100644
<br>
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
<br>
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
<br>
@@ -76,18 +76,14 @@ static void xe_device_sysfs_fini(struct
drm_device *drm, void *arg)
<br>
sysfs_remove_file(&xe->drm.dev->kobj,
&dev_attr_vram_d3cold_threshold.attr);
<br>
}
<br>
<br>
-void xe_device_sysfs_init(struct xe_device *xe)
<br>
+int xe_device_sysfs_init(struct xe_device *xe)
<br>
{
<br>
struct device *dev = xe->drm.dev;
<br>
int ret;
<br>
<br>
ret = sysfs_create_file(&dev->kobj,
&dev_attr_vram_d3cold_threshold.attr);
<br>
- if (ret) {
<br>
- drm_warn(&xe->drm, "Failed to create sysfs
file\n");
<br>
- return;
<br>
- }
<br>
-
<br>
- ret = drmm_add_action_or_reset(&xe->drm,
xe_device_sysfs_fini, xe);
<br>
if (ret)
<br>
- drm_warn(&xe->drm, "Failed to add sysfs fini drm
action\n");
<br>
+ return ret;
<br>
+
<br>
+ return drmm_add_action_or_reset(&xe->drm,
xe_device_sysfs_fini, xe);
<br>
}
<br>
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h
b/drivers/gpu/drm/xe/xe_device_sysfs.h
<br>
index 38b240684bee..f9e83d8bd2c7 100644
<br>
--- a/drivers/gpu/drm/xe/xe_device_sysfs.h
<br>
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
<br>
@@ -8,6 +8,6 @@
<br>
<br>
struct xe_device;
<br>
<br>
-void xe_device_sysfs_init(struct xe_device *xe);
<br>
+int xe_device_sysfs_init(struct xe_device *xe);
<br>
<br>
#endif
<br>
diff --git a/drivers/gpu/drm/xe/xe_pm.c
b/drivers/gpu/drm/xe/xe_pm.c
<br>
index f1fc83845c01..f4d9441720b4 100644
<br>
--- a/drivers/gpu/drm/xe/xe_pm.c
<br>
+++ b/drivers/gpu/drm/xe/xe_pm.c
<br>
@@ -208,10 +208,25 @@ static void xe_pm_runtime_init(struct
xe_device *xe)
<br>
pm_runtime_put(dev);
<br>
}
<br>
<br>
-void xe_pm_init_early(struct xe_device *xe)
<br>
+int xe_pm_init_early(struct xe_device *xe)
<br>
{
<br>
+ int err;
<br>
+
<br>
INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
<br>
- drmm_mutex_init(&xe->drm,
&xe->mem_access.vram_userfault.lock);
<br>
+
<br>
+ err = drmm_mutex_init(&xe->drm,
&xe->mem_access.vram_userfault.lock);
<br>
+ if (err)
<br>
+ return err;
<br>
+
<br>
+ /* Currently d3cold.lock will be used only with GuC */
<br>
</blockquote>
<br>
it's cheaper to just initialize it regardless so this can be
simpler
<br>
<br>
int xe_pm_init_early(struct xe_device *xe)
<br>
{
<br>
int err;
<br>
<br>
INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
<br>
<br>
if ((err = drmm_mutex_init(&xe->drm,
&xe->mem_access.vram_userfault.lock) ||
<br>
(err = drmm_mutex_init(&xe->drm,
&xe->d3cold.lock)))
<br>
return err;
<br>
</blockquote>
Looks clean. Will prefer using this. <br>
<blockquote type="cite" cite="mid:7ioxhkpm4jidj5m4hcc5jsatfk333d4sgso7o5bl2pqmq5xxyc@36dmyunxbviy">
<br>
return 0;
<br>
}
<br>
<br>
or keep the err assignment separate, doesn't matter much. But
<br>
when we mix success and failure for a return-early style it makes
<br>
it harder to read.
<br>
<br>
<blockquote type="cite">
<br>
/**
<br>
@@ -219,20 +234,27 @@ void xe_pm_init_early(struct xe_device
*xe)
<br>
* @xe: xe device instance
<br>
*
<br>
* This component is responsible for System and Device sleep
states.
<br>
+ *
<br>
</blockquote>
<br>
wrong line
<br>
<br>
<blockquote type="cite"> */
<br>
void xe_pm_init(struct xe_device *xe)
<br>
{
<br>
+ int err;
<br>
+
<br>
/* For now suspend/resume is only allowed with GuC */
<br>
if (!xe_device_uc_enabled(xe))
<br>
return;
<br>
<br>
- drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
<br>
-
<br>
xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
<br>
<br>
if (xe->d3cold.capable) {
<br>
- xe_device_sysfs_init(xe);
<br>
- xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
<br>
+ err = xe_device_sysfs_init(xe);
<br>
</blockquote>
<br>
apparently not because of this patch, but why do we call a
function
<br>
named xe_device_sysfs_init() iff xe->d3cold.capable? And from
within
<br>
xe_pm. That seems totally misplaced. +Rodrigo
<br>
<br>
<blockquote type="cite">+ if (err)
<br>
+ drm_warn(&xe->drm,
<br>
+ "Sysfs create for user to set vram threshold
failed\n");
<br>
</blockquote>
<br>
just warning?
<br>
</blockquote>
<p><span style="color: rgb(13, 13, 13); font-family: Söhne, ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, Ubuntu, Cantarell, "Noto Sans", sans-serif, "Helvetica Neue", Arial, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;">If I propagate xe_pm_int errors to xe_pci.c, it exhibits unexpected behavior. The PCI module throws errors as anticipated, but when I remove the xe module with "module -r xe", it doesn't properly clean up the "/sys/class/drm/card*" directory. Subsequently, upon reloading, it complains about existing sysfs entries and fails to load. This behavior aligns with the issue described in </span><a target="_new" href="https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1352" style="border: 0px solid rgb(227, 227, 227); box-sizing: border-box; --tw-border-spacing-x: 0; --tw-border-spacing-y: 0; --tw-translate-x: 0; --tw-translate-y: 0; --tw-rotate: 0; --tw-skew-x: 0; --tw-skew-y: 0; --tw-scale-x: 1; --tw-scale-y: 1; --tw-pan-x: ; --tw-pan-y: ; --tw-pinch-zoom: ; --tw-scroll-snap-strictness: proximity; --tw-gradient-from-position: ; --tw-gradient-via-position: ; --tw-gradient-to-position: ; --tw-ordinal: ; --tw-slashed-zero: ; --tw-numeric-figure: ; --tw-numeric-spacing: ; --tw-numeric-fraction: ; --tw-ring-inset: ; --tw-ring-offset-width: 0px; --tw-ring-offset-color: #fff; --tw-ring-color: rgba(69,89,164,.5); --tw-ring-offset-shadow: 0 0 transparent; --tw-ring-shadow: 0 0 transparent; --tw-shadow: 0 0 transparent; --tw-shadow-colored: 0 0 transparent; --tw-blur: ; --tw-brightness: ; --tw-contrast: ; --tw-grayscale: ; --tw-hue-rotate: ; --tw-invert: ; --tw-saturate: ; --tw-sepia: ; --tw-drop-shadow: ; --tw-backdrop-blur: ; --tw-backdrop-brightness: ; --tw-backdrop-contrast: ; --tw-backdrop-grayscale: ; --tw-backdrop-hue-rotate: ; --tw-backdrop-invert: ; --tw-backdrop-opacity: ; --tw-backdrop-saturate: ; --tw-backdrop-sepia: ; --tw-contain-size: ; --tw-contain-layout: ; --tw-contain-paint: ; --tw-contain-style: ; color: var(--link); text-decoration: none; font-weight: 400; font-family: Söhne, ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, Ubuntu, Cantarell, "Noto Sans", sans-serif, "Helvetica Neue", Arial, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255);" class="moz-txt-link-freetext">https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1352</a><span style="color: rgb(13, 13, 13); font-family: Söhne, ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, Ubuntu, Cantarell, "Noto Sans", sans-serif, "Helvetica Neue", Arial, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;">. </span></p>
<blockquote type="cite" cite="mid:7ioxhkpm4jidj5m4hcc5jsatfk333d4sgso7o5bl2pqmq5xxyc@36dmyunxbviy">
<br>
Lucas De Marchi
<br>
<br>
<blockquote type="cite">+
<br>
+ err = xe_pm_set_vram_threshold(xe,
DEFAULT_VRAM_THRESHOLD);
<br>
+ if (err)
<br>
+ drm_warn(&xe->drm, "Setting default vram
threshold failed\n");
<br>
}
<br>
<br>
xe_pm_runtime_init(xe);
<br>
diff --git a/drivers/gpu/drm/xe/xe_pm.h
b/drivers/gpu/drm/xe/xe_pm.h
<br>
index 0cb38ca244fe..1e6ec58878d2 100644
<br>
--- a/drivers/gpu/drm/xe/xe_pm.h
<br>
+++ b/drivers/gpu/drm/xe/xe_pm.h
<br>
@@ -20,7 +20,7 @@ struct xe_device;
<br>
int xe_pm_suspend(struct xe_device *xe);
<br>
int xe_pm_resume(struct xe_device *xe);
<br>
<br>
-void xe_pm_init_early(struct xe_device *xe);
<br>
+int xe_pm_init_early(struct xe_device *xe);
<br>
void xe_pm_init(struct xe_device *xe);
<br>
void xe_pm_runtime_fini(struct xe_device *xe);
<br>
bool xe_pm_runtime_suspended(struct xe_device *xe);
<br>
-- <br>
2.25.1
<br>
<br>
</blockquote>
</blockquote>
</body>
</html>