[PATCH 2/5] drm/amdgpu: Move to reset_schedule_work
Lijo Lazar
lijo.lazar at amd.com
Fri Aug 11 06:02:31 UTC 2023
Move recovery handlers to schedule reset work. Make use of the workpool
in the reset domain and delete the individual work items.
Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 +++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 -----
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 40 ++++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 71 +++++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 -
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 38 ++++++------
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 44 ++++++--------
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 33 +++++-----
10 files changed, 118 insertions(+), 159 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2e3c7c15cb8e..4186d8342a15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1096,8 +1096,6 @@ struct amdgpu_device {
bool scpm_enabled;
uint32_t scpm_status;
- struct work_struct reset_work;
-
bool job_hang;
bool dc_enabled;
/* Mask of active clusters */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 629ca1ad75a8..e4c5e8f68843 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -120,21 +120,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
-
-static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+static void amdgpu_amdkfd_reset_work(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- kfd.reset_work);
-
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ amdgpu_device_gpu_recover(reset_context->reset_req_dev, NULL,
+ reset_context);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
@@ -200,7 +189,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
- INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@@ -268,9 +256,17 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
- if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->kfd.reset_work);
+ struct amdgpu_reset_context reset_context;
+
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ memset(&reset_context, 0, sizeof(reset_context));
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_reset_schedule_work(adev, &reset_context,
+ amdgpu_amdkfd_reset_work);
+ }
}
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b34418e3e006..c36501f9ae0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -102,7 +102,6 @@ struct amdgpu_kfd_dev {
int64_t vram_used[MAX_XCP];
uint64_t vram_used_aligned[MAX_XCP];
bool init_complete;
- struct work_struct reset_work;
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
struct dev_pagemap pgmap;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9061d79cd387..3e56ccb742bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5152,21 +5152,6 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-
-#if defined(CONFIG_DEBUG_FS)
- if (!amdgpu_sriov_vf(adev))
- cancel_work(&adev->reset_work);
-#endif
-
- if (adev->kfd.dev)
- cancel_work(&adev->kfd.reset_work);
-
- if (amdgpu_sriov_vf(adev))
- cancel_work(&adev->virt.flr_work);
-
- if (con && adev->ras_enabled)
- cancel_work(&con->recovery_work);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c694b41f6461..40786b135f4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -899,6 +899,14 @@ static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
return 0;
}
+static void
+amdgpu_debugfs_reset_work(struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = reset_context->reset_req_dev;
+
+ amdgpu_device_gpu_recover(adev, NULL, reset_context);
+}
+
/*
* amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
*
@@ -908,6 +916,7 @@ static int gpu_recover_get(void *data, u64 *val)
{
struct amdgpu_device *adev = (struct amdgpu_device *)data;
struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_reset_context reset_context;
int r;
r = pm_runtime_get_sync(dev->dev);
@@ -916,8 +925,14 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
- if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
- flush_work(&adev->reset_work);
+ memset(&reset_context, 0, sizeof(reset_context));
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_RESET_SCHEDULE_NOW, &reset_context.flags);
+
+ amdgpu_reset_schedule_work(adev, &reset_context,
+ amdgpu_debugfs_reset_work);
*val = atomic_read(&adev->reset_domain->reset_res);
@@ -931,22 +946,6 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
"%lld\n");
-static void amdgpu_debugfs_reset_work(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- reset_work);
-
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-}
-
#endif
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
@@ -958,12 +957,9 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
&amdgpu_debugfs_fence_info_fops);
- if (!amdgpu_sriov_vf(adev)) {
-
- INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
+ if (!amdgpu_sriov_vf(adev))
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
- }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7689395e44fd..9e8e904434f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2011,12 +2011,11 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
return ret;
}
-static void amdgpu_ras_do_recovery(struct work_struct *work)
+static void amdgpu_ras_do_recovery(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_ras *ras =
- container_of(work, struct amdgpu_ras, recovery_work);
+ struct amdgpu_device *adev = reset_context->reset_req_dev;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
struct amdgpu_device *remote_adev = NULL;
- struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
if (!ras->disable_ras_err_cnt_harvest) {
@@ -2040,37 +2039,9 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
amdgpu_put_xgmi_hive(hive);
}
- if (amdgpu_device_should_recover_gpu(ras->adev)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
-
- /* Perform full reset in fatal error mode */
- if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- else {
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
- ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
- reset_context.method = AMD_RESET_METHOD_MODE2;
- }
-
- /* Fatal error occurs in poison mode, mode1 reset is used to
- * recover gpu.
- */
- if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
- ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- psp_fatal_error_recovery_quirk(&adev->psp);
- }
- }
+ if (amdgpu_device_should_recover_gpu(ras->adev))
+ amdgpu_device_gpu_recover(ras->adev, NULL, reset_context);
- amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
- }
atomic_set(&ras->in_recovery, 0);
}
@@ -2313,7 +2284,6 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
}
mutex_init(&con->recovery_lock);
- INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0;
@@ -3160,9 +3130,38 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,
int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+
+ /* Perform full reset in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) {
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ }
+ else {
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset_context.method = AMD_RESET_METHOD_MODE2;
+ }
+
+ /* Fatal error occurs in poison mode, mode1 reset is used to
+ * recover gpu.
+ */
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ psp_fatal_error_recovery_quirk(&adev->psp);
+ }
+ }
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
- amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ amdgpu_reset_schedule_work(ras->adev, &reset_context,
+ amdgpu_ras_do_recovery);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fabb83e9d9ae..87e0a8b918df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -237,7 +237,6 @@ struct amdgpu_virt {
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
- struct work_struct flr_work;
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 63725b2ebc03..53fdf6e70ad2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -249,10 +249,9 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+static void xgpu_ai_mailbox_flr_work(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_device *adev = reset_context->reset_req_dev;
int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
@@ -281,18 +280,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
up_write(&adev->reset_domain->sem);
/* Trigger recovery for world switch failure if no TDR */
- if (amdgpu_device_should_recover_gpu(adev)
- && (!amdgpu_device_has_job_running(adev) ||
- adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
- }
+ if (amdgpu_device_should_recover_gpu(adev) &&
+ (!amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
+ amdgpu_device_gpu_recover(adev, NULL, reset_context);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -314,14 +305,21 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+ struct amdgpu_reset_context reset_context;
switch (event) {
case IDH_FLR_NOTIFICATION:
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
- WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work),
- "Failed to queue work! at %s",
- __func__);
+ WARN_ONCE(!amdgpu_reset_schedule_work(
+ adev, &reset_context,
+ xgpu_ai_mailbox_flr_work),
+ "Failed to queue work! at %s", __func__);
break;
case IDH_QUERY_ALIVE:
xgpu_ai_mailbox_send_ack(adev);
@@ -388,8 +386,6 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
return r;
}
- INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 6a68ee946f1c..171fe3e84ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -271,10 +271,9 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+static void xgpu_nv_mailbox_flr_work(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_device *adev = reset_context->reset_req_dev;
int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
@@ -303,21 +302,13 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
up_write(&adev->reset_domain->sem);
/* Trigger recovery for world switch failure if no TDR */
- if (amdgpu_device_should_recover_gpu(adev)
- && (!amdgpu_device_has_job_running(adev) ||
- adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
- }
+ if (amdgpu_device_should_recover_gpu(adev) &&
+ (!amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
+ amdgpu_device_gpu_recover(adev, NULL, reset_context);
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -342,14 +333,21 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+ struct amdgpu_reset_context reset_context;
switch (event) {
case IDH_FLR_NOTIFICATION:
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
- WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work),
- "Failed to queue work! at %s",
- __func__);
+ WARN_ONCE(!amdgpu_reset_schedule_work(
+ adev, &reset_context,
+ xgpu_nv_mailbox_flr_work),
+ "Failed to queue work! at %s", __func__);
break;
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
* it byfar since that polling thread will handle it,
@@ -413,8 +411,6 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
return r;
}
- INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 59f53c743362..a39805bc69c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -510,10 +510,9 @@ static int xgpu_vi_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
+static void xgpu_vi_mailbox_flr_work(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_device *adev = reset_context->reset_req_dev;
/* wait until RCV_MSG become 3 */
if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
@@ -522,16 +521,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
- if (amdgpu_device_should_recover_gpu(adev)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
- }
+ if (amdgpu_device_should_recover_gpu(adev))
+ amdgpu_device_gpu_recover(adev, NULL, reset_context);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -553,18 +544,24 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int r;
+ struct amdgpu_reset_context reset_context;
/* trigger gpu-reset by hypervisor only if TDR disabled */
if (!amdgpu_gpu_recovery) {
/* see what event we get */
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
/* only handle FLR_NOTIFY now */
if (!r && !amdgpu_in_reset(adev))
- WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work),
- "Failed to queue work! at %s",
- __func__);
+ WARN_ONCE(!amdgpu_reset_schedule_work(
+ adev, &reset_context,
+ xgpu_vi_mailbox_flr_work),
+ "Failed to queue work! at %s", __func__);
}
return 0;
@@ -618,8 +615,6 @@ int xgpu_vi_mailbox_get_irq(struct amdgpu_device *adev)
return r;
}
- INIT_WORK(&adev->virt.flr_work, xgpu_vi_mailbox_flr_work);
-
return 0;
}
--
2.25.1
More information about the amd-gfx
mailing list