[PATCH v2 4/8] drm/amdgpu: Add reset control to amdgpu_device

Lazar, Lijo Lijo.Lazar at amd.com
Mon Mar 22 05:57:38 UTC 2021


[AMD Public Use]

v1: Add generic amdgpu_reset_control to handle different types of resets. It
may be added at device, hive or ip level. Each reset control has a list
of handlers associated with it to handle different types of reset. Reset
control is responsible for choosing the right handler given a particular
reset context.

Handler objects may implement a set of functions on how to handle a
particular type of reset.

prepare_env = Prepare environment/software context (not used currently).
prepare_hwcontext = Prepare hardware context for the reset.
perform_reset = Perform the type of reset.
restore_hwcontext = Restore the hw context after reset.
restore_env = Restore the environment after reset (not used currently).

Reset context carries the context of reset, as of now this is based on
the parameters used for current set of resets.

v2: Fix coding style

Signed-off-by: Lijo Lazar lijo.lazar at amd.com<mailto:lijo.lazar at amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile       |  5 ++
drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 82 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 85 +++++++++++++++++++++++
4 files changed, 175 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 741b68874e53..a0a5fd1788b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -179,9 +179,14 @@ amdgpu-y += \
               smuio_v11_0_6.o \
               smuio_v13_0.o
+# add reset block
+amdgpu-y += \
+             amdgpu_reset.o
+
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
+
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 963ecfd84347..1fba89cced91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -271,6 +271,7 @@ struct amdgpu_bo_va_mapping;
struct amdgpu_atif;
struct kfd_vm_fault_info;
struct amdgpu_hive_info;
+struct amdgpu_reset_control;
 enum amdgpu_cp_irq {
               AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -589,6 +590,7 @@ struct amdgpu_allowed_register_entry {
};
 enum amd_reset_method {
+             AMD_RESET_METHOD_NONE = -1,
               AMD_RESET_METHOD_LEGACY = 0,
               AMD_RESET_METHOD_MODE0,
               AMD_RESET_METHOD_MODE1,
@@ -1077,6 +1079,7 @@ struct amdgpu_device {
                bool                            in_pci_err_recovery;
               struct pci_saved_state          *pci_state;
+             struct amdgpu_reset_control     *reset_cntl;
};
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
new file mode 100644
index 000000000000..5bc94b8320ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_reset.h"
+#include "aldebaran.h"
+
+int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
+                                                  struct amdgpu_reset_handler *handler)
+{
+             /* TODO: Check if handler exists? */
+             list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers);
+             return 0;
+}
+
+int amdgpu_reset_init(struct amdgpu_device *adev)
+{
+             int ret = 0;
+
+             return ret;
+}
+
+int amdgpu_reset_fini(struct amdgpu_device *adev)
+{
+             int ret = 0;
+
+             return ret;
+}
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+                                                                struct amdgpu_reset_context *reset_context)
+{
+             struct amdgpu_reset_handler *reset_handler = NULL;
+
+             if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
+                             reset_handler = adev->reset_cntl->get_reset_handler(
+                                             adev->reset_cntl, reset_context);
+             if (!reset_handler)
+                             return -ENOSYS;
+
+             return reset_handler->prepare_hwcontext(adev->reset_cntl,
+                                                                                             reset_context);
+}
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+                                                    struct amdgpu_reset_context *reset_context)
+{
+             int ret;
+             struct amdgpu_reset_handler *reset_handler = NULL;
+
+             if (adev->reset_cntl)
+                             reset_handler = adev->reset_cntl->get_reset_handler(
+                                             adev->reset_cntl, reset_context);
+             if (!reset_handler)
+                             return -ENOSYS;
+
+             ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
+             if (ret)
+                             return ret;
+
+             return reset_handler->restore_hwcontext(adev->reset_cntl,
+                                                                                             reset_context);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
new file mode 100644
index 000000000000..dc84d871fe72
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDUGPU_RESET_H__
+#define __AMDGPU_RESET_H__
+
+#include "amdgpu.h"
+
+enum AMDGPU_RESET_FLAGS {
+
+             AMDGPU_NEED_FULL_RESET = 0,
+             AMDGPU_SKIP_HW_RESET = 1,
+};
+
+struct amdgpu_reset_context {
+             enum amd_reset_method method;
+             struct amdgpu_device *reset_req_dev;
+             struct amdgpu_job *job;
+             struct amdgpu_hive_info *hive;
+             unsigned long flags;
+};
+
+struct amdgpu_reset_handler {
+             enum amd_reset_method reset_method;
+             struct list_head handler_list;
+             int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
+                                                struct amdgpu_reset_context *context);
+             int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+                                                             struct amdgpu_reset_context *context);
+             int (*perform_reset)(struct amdgpu_reset_control *reset_ctl,
+                                                  struct amdgpu_reset_context *context);
+             int (*restore_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+                                                             struct amdgpu_reset_context *context);
+             int (*restore_env)(struct amdgpu_reset_control *reset_ctl,
+                                                struct amdgpu_reset_context *context);
+
+             int (*do_reset)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_reset_control {
+             void *handle;
+             struct work_struct reset_work;
+             struct mutex reset_lock;
+             struct list_head reset_handlers;
+             atomic_t in_reset;
+             enum amd_reset_method active_reset;
+             struct amdgpu_reset_handler *(*get_reset_handler)(
+                             struct amdgpu_reset_control *reset_ctl,
+                             struct amdgpu_reset_context *context);
+             void (*async_reset)(struct work_struct *work);
+};
+
+int amdgpu_reset_init(struct amdgpu_device *adev);
+int amdgpu_reset_fini(struct amdgpu_device *adev);
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+                                                                struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+                                                    struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
+                                                  struct amdgpu_reset_handler *handler);
+
+#endif
--
2.17.1

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20210322/9498a6f0/attachment-0001.htm>


More information about the amd-gfx mailing list