[PATCH 1/2] drm/amdgpu: fix using the reserved VMID with gang submit
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Jun 11 11:43:57 UTC 2024
We need to ensure that even when using a reserved VMID that the gang
members can still run in parallel.
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 21 ++++++++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 36 ++++++++++++++++------
3 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1f71c7b98d77..e28fc07c7dbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1450,6 +1450,7 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
u32 reg);
void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
u32 reg, u32 v);
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d549de26f931..94a6c0b1ae8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6532,6 +6532,22 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+/**
+ * amdgpu_device_get_gang - return a reference to the current gang
+ * @adev: amdgpu_device pointer
+ *
+ * Returns: A new reference to the current gang leader.
+ */
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
+{
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+ return fence;
+}
+
/**
* amdgpu_device_switch_gang - switch to a new gang
* @adev: amdgpu_device pointer
@@ -6548,10 +6564,7 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
do {
dma_fence_put(old);
- rcu_read_lock();
- old = dma_fence_get_rcu_safe(&adev->gang_submit);
- rcu_read_unlock();
-
+ old = amdgpu_device_get_gang(adev);
if (old == gang)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 3d7fcdeaf8cf..b5b9d4f40f53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -290,18 +290,36 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
!dma_fence_is_signaled((*id)->last_flush))) {
struct dma_fence *tmp;
- /* Don't use per engine and per process VMID at the same time */
- if (adev->vm_manager.concurrent_flush)
- ring = NULL;
-
- /* to prevent one context starved by another context */
- (*id)->pd_gpu_addr = 0;
- tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
- if (tmp) {
+ /* Wait for the gang to be assembled before using a
+ * reserved VMID or otherwise the gang could deadlock.
+ */
+ tmp = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
*id = NULL;
- *fence = dma_fence_get(tmp);
+ *fence = tmp;
return 0;
}
+ dma_fence_put(tmp);
+
+ /* Make sure the id is owned by the gang before proceeding */
+ if (!job->gang_submit ||
+ (*id)->owner != vm->immediate.fence_context) {
+
+ /* Don't use per engine and per process VMID at the
+ * same time
+ */
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
+ /* to prevent one context starved by another context */
+ (*id)->pd_gpu_addr = 0;
+ tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+ if (tmp) {
+ *id = NULL;
+ *fence = dma_fence_get(tmp);
+ return 0;
+ }
+ }
needs_flush = true;
}
--
2.34.1
More information about the amd-gfx
mailing list