[PATCH 1/7 V2] drm/amd/amdgpu: Simplify SDMA reset mechanism by removing dynamic callbacks
jesse.zhang at amd.com
jesse.zhang at amd.com
Wed Apr 2 03:02:09 UTC 2025
From: "Jesse.zhang at amd.com" <jesse.zhang at amd.com>
Since KFD no longer registers its own callbacks for SDMA resets, and only KGD uses the reset mechanism,
we can simplify the SDMA reset flow by directly calling the ring's `stop_queue` and `start_queue` functions.
This patch removes the dynamic callback mechanism and prepares for its eventual deprecation.
1. **Remove Dynamic Callbacks**:
- The `pre_reset` and `post_reset` callback invocations in `amdgpu_sdma_reset_engine` are removed.
- Instead, the ring's `stop_queue` and `start_queue` functions are called directly during the reset process.
2. **Prepare for Deprecation of Dynamic Mechanism**:
- By removing the callback invocations, this patch prepares the codebase for the eventual removal of the dynamic callback registration mechanism.
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 34 +++---------------------
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 ++
3 files changed, 8 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 615c3d5c5a8d..1b66be2b49dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -237,6 +237,8 @@ struct amdgpu_ring_funcs {
void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
int (*reset)(struct amdgpu_ring *ring, unsigned int vmid);
+ int (*stop_queue)(struct amdgpu_device *adev, uint32_t instance_id);
+ int (*start_queue)(struct amdgpu_device *adev, uint32_t instance_id);
void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
bool (*is_guilty)(struct amdgpu_ring *ring);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 0a9893fee828..7d862c887a1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -558,16 +558,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
* @adev: Pointer to the AMDGPU device
* @instance_id: ID of the SDMA engine instance to reset
*
- * This function performs the following steps:
- * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state.
- * 2. Resets the specified SDMA engine instance.
- * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state.
- *
* Returns: 0 on success, or a negative error code on failure.
*/
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
{
- struct sdma_on_reset_funcs *funcs;
int ret = 0;
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
@@ -589,18 +583,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
page_sched_stopped = true;
}
- /* Invoke all registered pre_reset callbacks */
- list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
- if (funcs->pre_reset) {
- ret = funcs->pre_reset(adev, instance_id);
- if (ret) {
- dev_err(adev->dev,
- "beforeReset callback failed for instance %u: %d\n",
- instance_id, ret);
- goto exit;
- }
- }
- }
+ if (gfx_ring->funcs->stop_queue)
+ gfx_ring->funcs->stop_queue(adev, instance_id);
/* Perform the SDMA reset for the specified instance */
ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
@@ -609,18 +593,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
goto exit;
}
- /* Invoke all registered post_reset callbacks */
- list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
- if (funcs->post_reset) {
- ret = funcs->post_reset(adev, instance_id);
- if (ret) {
- dev_err(adev->dev,
- "afterReset callback failed for instance %u: %d\n",
- instance_id, ret);
- goto exit;
- }
- }
- }
+ if (gfx_ring->funcs->start_queue)
+ gfx_ring->funcs->start_queue(adev, instance_id);
exit:
/* Restart the scheduler's work queue for the GFX and page rings
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 688a720bbbbd..df82a97a5388 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2143,6 +2143,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
.reset = sdma_v4_4_2_reset_queue,
+ .stop_queue = sdma_v4_4_2_stop_queue,
+ .start_queue = sdma_v4_4_2_restore_queue,
.is_guilty = sdma_v4_4_2_ring_is_guilty,
};
--
2.25.1
More information about the amd-gfx
mailing list