[PATCH] drm/amdgpu: Keep reset handlers shared

Lazar, Lijo Lijo.Lazar at amd.com
Wed Aug 16 05:38:01 UTC 2023


[AMD Official Use Only - General]

<ping>

Thanks,
Lijo

-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Lijo Lazar
Sent: Thursday, August 10, 2023 5:14 PM
To: amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Kamal, Asad <Asad.Kamal at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: [PATCH] drm/amdgpu: Keep reset handlers shared

Instead of maintaining a list per device, keep the reset handlers common per ASIC family. A pointer to the list of handlers is maintained in reset control.

Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c      | 19 +++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  8 --------
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h   | 16 ++++++++++++----
 drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 20 +++++++++++---------
 drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c   | 19 +++++++++++--------
 5 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 2b97b8a96fb4..82e1c83a7ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -48,20 +48,19 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,  {
        struct amdgpu_reset_handler *handler;
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       int i;

        if (reset_context->method != AMD_RESET_METHOD_NONE) {
                dev_dbg(adev->dev, "Getting reset handler for method %d\n",
                        reset_context->method);
-               list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == reset_context->method)
                                return handler;
                }
        }

        if (aldebaran_is_mode2_default(reset_ctl)) {
-               list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
                                reset_context->method = AMD_RESET_METHOD_MODE2;
                                return handler;
@@ -124,9 +123,9 @@ static void aldebaran_async_reset(struct work_struct *work)
        struct amdgpu_reset_control *reset_ctl =
                container_of(work, struct amdgpu_reset_control, reset_work);
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       int i;

-       list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                            handler_list) {
+       for_each_handler(i, handler, reset_ctl) {
                if (handler->reset_method == reset_ctl->active_reset) {
                        dev_dbg(adev->dev, "Resetting device\n");
                        handler->do_reset(adev);
@@ -395,6 +394,11 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = {
        .do_reset               = aldebaran_mode2_reset,
 };

+static struct amdgpu_reset_handler
+       *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+               &aldebaran_mode2_handler,
+       };
+
 int aldebaran_reset_init(struct amdgpu_device *adev)  {
        struct amdgpu_reset_control *reset_ctl; @@ -408,10 +412,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
        reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
        reset_ctl->get_reset_handler = aldebaran_get_reset_handler;

-       INIT_LIST_HEAD(&reset_ctl->reset_handlers);
        INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
        /* Only mode2 is handled through reset control now */
-       amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler);
+       reset_ctl->reset_handlers = &aldebaran_rst_handlers;

        adev->reset_cntl = reset_ctl;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 5fed06ffcc6b..02d874799c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,14 +26,6 @@
 #include "sienna_cichlid.h"
 #include "smu_v13_0_10.h"

-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
-                            struct amdgpu_reset_handler *handler)
-{
-       /* TODO: Check if handler exists? */
-       list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers);
-       return 0;
-}
-
 int amdgpu_reset_init(struct amdgpu_device *adev)  {
        int ret = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index f4a501ff87d9..471d789b33a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,6 +26,8 @@

 #include "amdgpu.h"

+#define AMDGPU_RESET_MAX_HANDLERS 5
+
 enum AMDGPU_RESET_FLAGS {

        AMDGPU_NEED_FULL_RESET = 0,
@@ -44,7 +46,6 @@ struct amdgpu_reset_context {

 struct amdgpu_reset_handler {
        enum amd_reset_method reset_method;
-       struct list_head handler_list;
        int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
                           struct amdgpu_reset_context *context);
        int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl, @@ -63,7 +64,8 @@ struct amdgpu_reset_control {
        void *handle;
        struct work_struct reset_work;
        struct mutex reset_lock;
-       struct list_head reset_handlers;
+       struct amdgpu_reset_handler *(
+               *reset_handlers)[AMDGPU_RESET_MAX_HANDLERS];
        atomic_t in_reset;
        enum amd_reset_method active_reset;
        struct amdgpu_reset_handler *(*get_reset_handler)( @@ -97,8 +99,10 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,  int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
                               struct amdgpu_reset_context *reset_context);

-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
-                            struct amdgpu_reset_handler *handler);
+int amdgpu_reset_prepare_env(struct amdgpu_device *adev,
+                            struct amdgpu_reset_context *reset_context); int
+amdgpu_reset_restore_env(struct amdgpu_device *adev,
+                            struct amdgpu_reset_context *reset_context);

 struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
                                                             char *wq_name);
@@ -126,4 +130,8 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);

 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);

+#define for_each_handler(i, handler, reset_ctl)                  \
+       for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) &&           \
+                   (handler = (*reset_ctl->reset_handlers)[i]); \
+            ++i)
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
index 8b8086d5c864..07ded70f4df9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -48,18 +48,17 @@ sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
                            struct amdgpu_reset_context *reset_context)  {
        struct amdgpu_reset_handler *handler;
+       int i;

        if (reset_context->method != AMD_RESET_METHOD_NONE) {
-               list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == reset_context->method)
                                return handler;
                }
        }

        if (sienna_cichlid_is_mode2_default(reset_ctl)) {
-               list_for_each_entry (handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == AMD_RESET_METHOD_MODE2)
                                return handler;
                }
@@ -120,9 +119,9 @@ static void sienna_cichlid_async_reset(struct work_struct *work)
        struct amdgpu_reset_control *reset_ctl =
                container_of(work, struct amdgpu_reset_control, reset_work);
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       int i;

-       list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                            handler_list) {
+       for_each_handler(i, handler, reset_ctl) {
                if (handler->reset_method == reset_ctl->active_reset) {
                        dev_dbg(adev->dev, "Resetting device\n");
                        handler->do_reset(adev);
@@ -281,6 +280,11 @@ static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
        .do_reset               = sienna_cichlid_mode2_reset,
 };

+static struct amdgpu_reset_handler
+       *sienna_cichlid_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+               &sienna_cichlid_mode2_handler,
+       };
+
 int sienna_cichlid_reset_init(struct amdgpu_device *adev)  {
        struct amdgpu_reset_control *reset_ctl; @@ -294,11 +298,9 @@ int sienna_cichlid_reset_init(struct amdgpu_device *adev)
        reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
        reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;

-       INIT_LIST_HEAD(&reset_ctl->reset_handlers);
        INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
        /* Only mode2 is handled through reset control now */
-       amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
-
+       reset_ctl->reset_handlers = &sienna_cichlid_rst_handlers;
        adev->reset_cntl = reset_ctl;

        return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
index ae29620b1ea4..04c797d54511 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -44,10 +44,10 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,  {
        struct amdgpu_reset_handler *handler;
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       int i;

        if (reset_context->method != AMD_RESET_METHOD_NONE) {
-               list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == reset_context->method)
                                return handler;
                }
@@ -55,8 +55,7 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,

        if (smu_v13_0_10_is_mode2_default(reset_ctl) &&
                amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) {
-               list_for_each_entry (handler, &reset_ctl->reset_handlers,
-                                    handler_list) {
+               for_each_handler(i, handler, reset_ctl) {
                        if (handler->reset_method == AMD_RESET_METHOD_MODE2)
                                return handler;
                }
@@ -119,9 +118,9 @@ static void smu_v13_0_10_async_reset(struct work_struct *work)
        struct amdgpu_reset_control *reset_ctl =
                container_of(work, struct amdgpu_reset_control, reset_work);
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       int i;

-       list_for_each_entry(handler, &reset_ctl->reset_handlers,
-                            handler_list) {
+       for_each_handler(i, handler, reset_ctl) {
                if (handler->reset_method == reset_ctl->active_reset) {
                        dev_dbg(adev->dev, "Resetting device\n");
                        handler->do_reset(adev);
@@ -272,6 +271,11 @@ static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = {
        .do_reset               = smu_v13_0_10_mode2_reset,
 };

+static struct amdgpu_reset_handler
+       *smu_v13_0_10_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+               &smu_v13_0_10_mode2_handler,
+       };
+
 int smu_v13_0_10_reset_init(struct amdgpu_device *adev)  {
        struct amdgpu_reset_control *reset_ctl; @@ -285,10 +289,9 @@ int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
        reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
        reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler;

-       INIT_LIST_HEAD(&reset_ctl->reset_handlers);
        INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
        /* Only mode2 is handled through reset control now */
-       amdgpu_reset_add_handler(reset_ctl, &smu_v13_0_10_mode2_handler);
+       reset_ctl->reset_handlers = &smu_v13_0_10_rst_handlers;

        adev->reset_cntl = reset_ctl;

--
2.25.1



More information about the amd-gfx mailing list