[PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap

Fri Aug 8 06:29:10 UTC 2025

This change validates the userq to see whether can be
unmapped prior to the userq VA GEM unmap. The solution
is based on the following idea:
1) Find out the GEM unmap VA belonds to which userq,
2) Check the userq fence fence whether is signaled,
3) If the userq attached fences signal failed, then
   mark it as illegal VA opt and give a warning message
   for this illegal userspace request.

Suggested-by: Christian König <christian.koenig at amd.com>
Signed-off-by: Prike Liang <Prike.Liang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  10 ++
 3 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 771f57d09060..314d482849c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		}
 	}
 
-
 	args->out.queue_id = qid;
 
 unlock:
@@ -1214,3 +1213,109 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
 	mutex_unlock(&adev->userq_mutex);
 	return ret;
 }
+
+/**
+ * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem unmap va
+ * @queue: destinated userq for finding out from unmap va
+ * @va: the GEM unmap virtual address already aligned in mapping range
+ * Find out the corresponding userq by comparing
+ * the GEM unmap VA with userq VAs.
+ */
+static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct amdgpu_usermode_queue *queue,
+                                                       uint64_t va)
+{
+	va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
+
+	switch (queue->queue_type) {
+	case AMDGPU_HW_IP_GFX:
+		if (queue->queue_va == va ||
+		    queue->wptr_va  == va ||
+		    queue->rptr_va  == va ||
+		    queue->shadow_va == va ||
+		    queue->csa_va  == va)
+			return true;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		if (queue->queue_va == va ||
+		    queue->wptr_va == va ||
+		    queue->rptr_va  == va ||
+		    queue->eop_va  == va)
+			return true;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		if (queue->queue_va == va ||
+		    queue->wptr_va == va ||
+		    queue->rptr_va == va ||
+		    queue->csa_va == va)
+			return true;
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+				uint64_t va)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret;
+
+	if (!ip_mask)
+		return 0;
+
+	/**
+	 * validate the unmap va sequence:
+	 * 1) Find out the GEM unmap VA belonds to which userq,
+	 * 2) Check the userq fence whether is signaled,
+	 * 3) If the userq attached fences signal failed, then
+	 *    mark as invalid va opt and give a warning message
+	 *    for this illegal userspace request.
+	 */
+
+	if (mutex_trylock(&adev->userq_mutex)) {
+		list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+
+			if (!mutex_trylock(&uqm->userq_mutex))
+				continue;
+
+			idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+
+				if (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, va)) {
+					dev_dbg(uqm->adev->dev, "va: 0x%llx not belond to queue id: %d\n",
+						va, queue_id);
+					continue;
+				}
+
+				if (queue->last_fence && !dma_fence_is_signaled(queue->last_fence)) {
+					drm_file_err(uqm->file, "an illegal VA unmap for the userq\n");
+					queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+					ret = -ETIMEDOUT;
+					goto err;
+				}
+				/*
+				 * At here still can't suspend the userq since here just one kind of
+				 * VA unmapped, and some other VAs of userq may still be mapped. After
+				 * this point this VA mapping will be deteled and the VA will be unmapped
+				 * so will not resume the userq when its VA unmapped.
+				 */
+			}
+			mutex_unlock(&uqm->userq_mutex);
+		}
+	} else {
+			/* Maybe we need a try lock again before return*/
+			return -EBUSY;
+	}
+
+	mutex_unlock(&adev->userq_mutex);
+	return 0;
+err:
+	mutex_unlock(&uqm->userq_mutex);
+	mutex_unlock(&adev->userq_mutex);
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index cf35b6140a3d..27ab8a6a7be6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
 int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr);
 int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
 			struct amdgpu_usermode_queue *queue);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+				uint64_t va);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f042372d9f2e..533954c0d234 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	bool valid = true;
+	int r;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 
@@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 			return -ENOENT;
 	}
 
+	/* It's unlikely to happen that the mapping userq hasn't been idled
+	 * during user requests GEM unmap IOCTL except for forcing the unmap
+	 * from user space.
+	 */
+
+	r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
+	if (unlikely(r && r != -EBUSY))
+		dev_warn(adev->dev, "Here should be an improper unmap request from user space\n");
+
 	list_del(&mapping->list);
 	amdgpu_vm_it_remove(mapping, &vm->va);
 	mapping->bo_va = NULL;
-- 
2.34.1