[PATCH 1/5] drm/amdgpu: Add work pool to reset domain
Lijo Lazar
lijo.lazar at amd.com
Fri Aug 11 06:02:30 UTC 2023
Add a work pool to reset domain. The work pool will be used to schedule
any task in the reset domain. If on successful reset of the domain
indicated by a flag in reset context, all work that are queued will be
drained. Their work handlers won't be executed.
Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 104 +++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 22 +++++
2 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 02d874799c16..713362a60c9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -117,6 +117,51 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref)
kvfree(reset_domain);
}
+static void amdgpu_reset_domain_cancel_all_work(struct work_struct *work)
+{
+ struct amdgpu_reset_domain *reset_domain =
+ container_of(work, struct amdgpu_reset_domain, clear);
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i)
+ if (atomic_cmpxchg(&reset_domain->work[i].in_use, 1, 0))
+ cancel_work(&reset_domain->work[i].work);
+
+ drain_workqueue(reset_domain->wq);
+ reset_domain->drain = false;
+}
+
+static void amdgpu_reset_work_handler(struct work_struct *work)
+{
+ struct amdgpu_reset_work *reset_work =
+ container_of(work, struct amdgpu_reset_work, work);
+
+ /* Don't do anything if reset domain is in drain mode */
+ if (reset_work->domain->drain)
+ return;
+
+ reset_work->handler(&reset_work->context);
+ if (reset_work->context.flags & (1U << AMDGPU_RESET_CANCEL_ALL)) {
+ reset_work->domain->drain = true;
+ schedule_work(&reset_work->domain->clear);
+ }
+
+ atomic_set(&reset_work->in_use, 0);
+}
+
+static void
+amdgpu_reset_init_work_pool(struct amdgpu_reset_domain *reset_domain)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i) {
+ INIT_WORK(&reset_domain->work[i].work,
+ amdgpu_reset_work_handler);
+ atomic_set(&reset_domain->work[i].in_use, 0);
+ reset_domain->work[i].domain = reset_domain;
+ }
+}
+
struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
char *wq_name)
{
@@ -139,6 +184,8 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
}
+ INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work);
+ amdgpu_reset_init_work_pool(reset_domain);
atomic_set(&reset_domain->in_gpu_reset, 0);
atomic_set(&reset_domain->reset_res, 0);
init_rwsem(&reset_domain->sem);
@@ -152,12 +199,67 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
down_write(&reset_domain->sem);
}
-
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
{
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
+static int
+amdgpu_reset_domain_get_work(struct amdgpu_reset_domain *reset_domain,
+ struct amdgpu_reset_work **reset_work)
+{
+ int i;
+ if (!reset_work)
+ return -EINVAL;
+
+ *reset_work = NULL;
+ for (i = 0; i < AMDGPU_MAX_RESET_WORK; ++i) {
+ if (!atomic_cmpxchg(&reset_domain->work[i].in_use, 0, 1)) {
+ *reset_work = &reset_domain->work[i];
+ return 0;
+ }
+ }
+ /* All resources occupied */
+
+ return -EBUSY;
+}
+
+static void amdgpu_reset_init_work(struct amdgpu_reset_work *reset_work,
+ struct amdgpu_reset_context *reset_context,
+ amdgpu_reset_work_func_t reset_work_handler)
+{
+ memcpy(&reset_work->context, reset_context, sizeof(*reset_context));
+ reset_work->handler = reset_work_handler;
+}
+
+int amdgpu_reset_schedule_work(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context,
+ amdgpu_reset_work_func_t reset_work_handler)
+{
+ struct amdgpu_reset_work *reset_work;
+ int ret;
+
+ if (!reset_context || !reset_context->reset_req_dev ||
+ !reset_work_handler)
+ return -EINVAL;
+
+ ret = amdgpu_reset_domain_get_work(adev->reset_domain, &reset_work);
+
+ if (ret)
+ return ret;
+
+ if (!ret) {
+ amdgpu_reset_init_work(reset_work, reset_context,
+ reset_work_handler);
+
+ queue_work(adev->reset_domain->wq, &reset_work->work);
+
+ if (reset_context->flags & (1U << AMDGPU_RESET_SCHEDULE_NOW))
+ flush_work(&reset_work->work);
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 471d789b33a5..d1393050d3ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -27,12 +27,16 @@
#include "amdgpu.h"
#define AMDGPU_RESET_MAX_HANDLERS 5
+#define AMDGPU_MAX_RESET_WORK 5
enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
+ AMDGPU_RESET_XCP = 3,
+ AMDGPU_RESET_SCHEDULE_NOW = 4,
+ AMDGPU_RESET_CANCEL_ALL = 5,
};
struct amdgpu_reset_context {
@@ -80,13 +84,28 @@ enum amdgpu_reset_domain_type {
XGMI_HIVE
};
+typedef void (*amdgpu_reset_work_func_t)(
+ struct amdgpu_reset_context *reset_context);
+
+struct amdgpu_reset_work {
+ struct work_struct work;
+ struct amdgpu_reset_context context;
+ struct amdgpu_reset_domain *domain;
+ atomic_t in_use;
+
+ amdgpu_reset_work_func_t handler;
+};
+
struct amdgpu_reset_domain {
struct kref refcount;
struct workqueue_struct *wq;
enum amdgpu_reset_domain_type type;
+ struct amdgpu_reset_work work[AMDGPU_MAX_RESET_WORK];
+ struct work_struct clear;
struct rw_semaphore sem;
atomic_t in_gpu_reset;
atomic_t reset_res;
+ bool drain;
};
@@ -129,6 +148,9 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+int amdgpu_reset_schedule_work(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context,
+ amdgpu_reset_work_func_t handler);
#define for_each_handler(i, handler, reset_ctl) \
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
--
2.25.1
More information about the amd-gfx
mailing list