[PATCH 2/2] drm/amdgpu: Add SDMA queue start/stop functions and integrate with ring funcs
Jesse.zhang@amd.com
jesse.zhang at amd.com
Tue Mar 11 08:33:00 UTC 2025
From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
This patch introduces two new functions, `amdgpu_sdma_stop_queue` and
`amdgpu_sdma_start_queue`, to handle the stopping and starting of SDMA queues
during engine reset operations. The changes include:
1. **New Functions**:
- `amdgpu_sdma_stop_queue`: Stops the SDMA queues and the scheduler's work queue
for the GFX and page rings.
- `amdgpu_sdma_start_queue`: Starts the SDMA queues and restarts the scheduler's
work queue for the GFX and page rings.
2. **Integration with Ring Functions**:
- The `stop_queue` and `start_queue` callbacks are added to the `amdgpu_ring_funcs`
structure and implemented for SDMA v4.4.2.
Suggested-by:Jonathan Kim <jonathan.kim at amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 92 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +
4 files changed, 97 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d55c8b7fdb59..ff9aacbdf046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -351,6 +351,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
0xffffffffffffffff : ring->buf_mask;
/* Initialize cached_rptr to 0 */
ring->cached_rptr = 0;
+ atomic_set(&ring->stop_refcount, 0);
/* Allocate ring buffer */
if (ring->is_mes_queue) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1c52ff92ea26..7a984dbb48c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -312,6 +312,8 @@ struct amdgpu_ring {
unsigned int entry_index;
/* store the cached rptr to restore after reset */
uint64_t cached_rptr;
+ /* Reference counter for stop requests */
+ atomic_t stop_refcount;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 39669f8788a7..7cd6dcd6e7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -30,6 +30,7 @@
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
+DEFINE_MUTEX(sdma_queue_mutex);
/*
* GPU SDMA IP block helpers function.
@@ -504,6 +505,97 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
}
}
+int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+ int r;
+
+ mutex_lock(&sdma_queue_mutex);
+
+ /* Avoid accidentally unparking the sched thread during GPU reset */
+ r = down_read_killable(&adev->reset_domain->sem);
+ if (r)
+ goto exit;
+
+ /* Increment the reference counter */
+ atomic_inc(&gfx_ring->stop_refcount);
+ if (adev->sdma.has_page_queue)
+ atomic_inc(&page_ring->stop_refcount);
+
+ if (atomic_read(&gfx_ring->stop_refcount) != 1 ||
+ (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 1)) {
+ up_read(&adev->reset_domain->sem);
+ r = -EBUSY;
+ goto exit;
+ }
+
+ if (!amdgpu_ring_sched_ready(gfx_ring))
+ drm_sched_wqueue_stop(&gfx_ring->sched);
+
+ if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring))
+ drm_sched_wqueue_stop(&page_ring->sched);
+
+ if (gfx_ring->funcs && gfx_ring->funcs->stop_queue)
+ gfx_ring->funcs->stop_queue(adev, instance_id);
+
+ if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->stop_queue)
+ page_ring->funcs->stop_queue(adev, instance_id);
+
+ up_read(&adev->reset_domain->sem);
+
+exit:
+ mutex_unlock(&sdma_queue_mutex);
+ return r;
+}
+
+int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+ int r;
+
+ mutex_lock(&sdma_queue_mutex);
+
+ /* Avoid accidentally unparking the sched thread during GPU reset */
+ r = down_read_killable(&adev->reset_domain->sem);
+ if (r)
+ goto exit;
+
+ /* Decrement the reference counter */
+ atomic_dec(&gfx_ring->stop_refcount);
+ if (adev->sdma.has_page_queue)
+ atomic_dec(&page_ring->stop_refcount);
+
+ if (atomic_read(&gfx_ring->stop_refcount) != 0 ||
+ (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 0)) {
+ up_read(&adev->reset_domain->sem);
+ r = -EBUSY;
+ goto exit;
+ }
+
+ if (gfx_ring->funcs && gfx_ring->funcs->start_queue)
+ gfx_ring->funcs->start_queue(adev, instance_id);
+
+ if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->start_queue)
+ page_ring->funcs->start_queue(adev, instance_id);
+
+ /* Restart the scheduler's work queue for the GFX and page rings */
+ if (amdgpu_ring_sched_ready(gfx_ring))
+ drm_sched_wqueue_start(&gfx_ring->sched);
+
+ if (amdgpu_ring_sched_ready(page_ring))
+ drm_sched_wqueue_start(&page_ring->sched);
+
+ up_read(&adev->reset_domain->sem);
+
+exit:
+ mutex_unlock(&sdma_queue_mutex);
+ return r;
+}
+
/**
* amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
* @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 965169320065..a91791fa3ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -170,6 +170,8 @@ struct amdgpu_buffer_funcs {
void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues);
+int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id);
+int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id);
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
--
2.25.1
More information about the amd-gfx
mailing list