[PATCH v2 06/07] drm/amdgpu: add option params to enforce process isolation between graphics and compute
Christian König
christian.koenig at amd.com
Wed Jun 7 12:07:42 UTC 2023
Am 07.06.23 um 12:57 schrieb Chong Li:
> enforce process isolation between graphics and compute via using the same reserved vmid.
>
> v2: remove params "struct amdgpu_vm *vm" from
> amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved.
>
> Signed-off-by: Chong Li <chongli2 at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 6 ++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++-----
> 5 files changed, 36 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ce196badf42d..ef098a7287d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -215,6 +215,7 @@ extern int amdgpu_force_asic_type;
> extern int amdgpu_smartshift_bias;
> extern int amdgpu_use_xgmi_p2p;
> extern int amdgpu_mtype_local;
> +extern bool enforce_isolation;
> #ifdef CONFIG_HSA_AMD
> extern int sched_policy;
> extern bool debug_evictions;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 3d91e123f9bd..fdb6fb8229ab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
> uint amdgpu_sdma_phase_quantum = 32;
> char *amdgpu_disable_cu;
> char *amdgpu_virtual_display;
> -
> +bool enforce_isolation;
> /*
> * OverDrive(bit 14) disabled by default
> * GFX DCS(bit 19) disabled by default
> @@ -973,6 +973,14 @@ MODULE_PARM_DESC(
> 4 = AMDGPU_CPX_PARTITION_MODE)");
> module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
>
> +
> +/**
> + * DOC: enforce_isolation (bool)
> + * enforce process isolation between graphics and compute via using the same reserved vmid.
> + */
> +module_param(enforce_isolation, bool, 0444);
> +MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
> +
> /* These devices are not supported by amdgpu.
> * They are supported by the mach64, r128, radeon drivers
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index c991ca0b7a1c..ff1ea99292fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
> if (r || !idle)
> goto error;
>
> - if (vm->reserved_vmid[vmhub]) {
> + if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
> r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
> if (r || !id)
> goto error;
> @@ -460,14 +460,11 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
> }
>
> int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> unsigned vmhub)
> {
> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>
> mutex_lock(&id_mgr->lock);
> - if (vm->reserved_vmid[vmhub])
> - goto unlock;
>
> ++id_mgr->reserved_use_count;
> if (!id_mgr->reserved) {
> @@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> list_del_init(&id->list);
> id_mgr->reserved = id;
> }
> - vm->reserved_vmid[vmhub] = true;
>
> -unlock:
> mutex_unlock(&id_mgr->lock);
> return 0;
> }
>
> void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> unsigned vmhub)
> {
> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>
> mutex_lock(&id_mgr->lock);
> - if (vm->reserved_vmid[vmhub] &&
> - !--id_mgr->reserved_use_count) {
> + if (!--id_mgr->reserved_use_count) {
> /* give the reserved ID back to normal round robin */
> list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
> id_mgr->reserved = NULL;
> }
> - vm->reserved_vmid[vmhub] = false;
> +
> mutex_unlock(&id_mgr->lock);
> }
>
> @@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
> list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
> }
> }
> + /* alloc a default reserved vmid to enforce isolation */
> + if (enforce_isolation)
> + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
> +
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index d1cc09b45da4..68add23dc87c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
> bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
> struct amdgpu_vmid *id);
> int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - unsigned vmhub);
> + unsigned vmhub);
> void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - unsigned vmhub);
> + unsigned vmhub);
> int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
> struct amdgpu_job *job, struct dma_fence **fence);
> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ea3d0be152fc..73900ab545c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2358,8 +2358,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> }
>
> dma_fence_put(vm->last_update);
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
> - amdgpu_vmid_free_reserved(adev, vm, i);
> +
> + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> + if (vm->reserved_vmid[i]) {
> + amdgpu_vmid_free_reserved(adev, i);
> + vm->reserved_vmid[i] = false;
> + }
> + }
> +
> }
>
> /**
> @@ -2447,13 +2453,19 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> switch (args->in.op) {
> case AMDGPU_VM_OP_RESERVE_VMID:
> /* We only have requirement to reserve vmid from gfxhub */
> - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
> - AMDGPU_GFXHUB(0));
> + if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> + r = amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
> + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
> + }
> +
> if (r)
> return r;
> break;
> case AMDGPU_VM_OP_UNRESERVE_VMID:
> - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0));
> + if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
> + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
> + }
> break;
> default:
> return -EINVAL;
More information about the amd-gfx
mailing list