[PATCH v2 06/07] drm/amdgpu: add option params to enforce process isolation between graphics and compute

Christian König christian.koenig at amd.com
Wed Jun 7 12:07:42 UTC 2023


Am 07.06.23 um 12:57 schrieb Chong Li:
> enforce process isolation between graphics and compute via using the same reserved vmid.
>
> v2: remove params "struct amdgpu_vm *vm" from
>      amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved.
>
> Signed-off-by: Chong Li <chongli2 at amd.com>

Reviewed-by: Christian König <christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  6 ++----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 22 +++++++++++++++++-----
>   5 files changed, 36 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ce196badf42d..ef098a7287d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -215,6 +215,7 @@ extern int amdgpu_force_asic_type;
>   extern int amdgpu_smartshift_bias;
>   extern int amdgpu_use_xgmi_p2p;
>   extern int amdgpu_mtype_local;
> +extern bool enforce_isolation;
>   #ifdef CONFIG_HSA_AMD
>   extern int sched_policy;
>   extern bool debug_evictions;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 3d91e123f9bd..fdb6fb8229ab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
>   uint amdgpu_sdma_phase_quantum = 32;
>   char *amdgpu_disable_cu;
>   char *amdgpu_virtual_display;
> -
> +bool enforce_isolation;
>   /*
>    * OverDrive(bit 14) disabled by default
>    * GFX DCS(bit 19) disabled by default
> @@ -973,6 +973,14 @@ MODULE_PARM_DESC(
>   						4 = AMDGPU_CPX_PARTITION_MODE)");
>   module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
>   
> +
> +/**
> + * DOC: enforce_isolation (bool)
> + * enforce process isolation between graphics and compute via using the same reserved vmid.
> + */
> +module_param(enforce_isolation, bool, 0444);
> +MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
> +
>   /* These devices are not supported by amdgpu.
>    * They are supported by the mach64, r128, radeon drivers
>    */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index c991ca0b7a1c..ff1ea99292fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   	if (r || !idle)
>   		goto error;
>   
> -	if (vm->reserved_vmid[vmhub]) {
> +	if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
>   		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
>   		if (r || !id)
>   			goto error;
> @@ -460,14 +460,11 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   }
>   
>   int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> -			       struct amdgpu_vm *vm,
>   			       unsigned vmhub)
>   {
>   	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>   
>   	mutex_lock(&id_mgr->lock);
> -	if (vm->reserved_vmid[vmhub])
> -		goto unlock;
>   
>   	++id_mgr->reserved_use_count;
>   	if (!id_mgr->reserved) {
> @@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
>   		list_del_init(&id->list);
>   		id_mgr->reserved = id;
>   	}
> -	vm->reserved_vmid[vmhub] = true;
>   
> -unlock:
>   	mutex_unlock(&id_mgr->lock);
>   	return 0;
>   }
>   
>   void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> -			       struct amdgpu_vm *vm,
>   			       unsigned vmhub)
>   {
>   	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>   
>   	mutex_lock(&id_mgr->lock);
> -	if (vm->reserved_vmid[vmhub] &&
> -	    !--id_mgr->reserved_use_count) {
> +	if (!--id_mgr->reserved_use_count) {
>   		/* give the reserved ID back to normal round robin */
>   		list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
>   		id_mgr->reserved = NULL;
>   	}
> -	vm->reserved_vmid[vmhub] = false;
> +
>   	mutex_unlock(&id_mgr->lock);
>   }
>   
> @@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
>   			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
>   		}
>   	}
> +	/* alloc a default reserved vmid to enforce isolation */
> +	if (enforce_isolation)
> +		amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
> +
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index d1cc09b45da4..68add23dc87c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>   bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>   			       struct amdgpu_vmid *id);
>   int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> -			       struct amdgpu_vm *vm,
> -			       unsigned vmhub);
> +					unsigned vmhub);
>   void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> -			       struct amdgpu_vm *vm,
> -			       unsigned vmhub);
> +					unsigned vmhub);
>   int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   		     struct amdgpu_job *job, struct dma_fence **fence);
>   void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ea3d0be152fc..73900ab545c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2358,8 +2358,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	}
>   
>   	dma_fence_put(vm->last_update);
> -	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
> -		amdgpu_vmid_free_reserved(adev, vm, i);
> +
> +	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> +		if (vm->reserved_vmid[i]) {
> +			amdgpu_vmid_free_reserved(adev, i);
> +			vm->reserved_vmid[i] = false;
> +		}
> +	}
> +
>   }
>   
>   /**
> @@ -2447,13 +2453,19 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	switch (args->in.op) {
>   	case AMDGPU_VM_OP_RESERVE_VMID:
>   		/* We only have requirement to reserve vmid from gfxhub */
> -		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
> -					       AMDGPU_GFXHUB(0));
> +		if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> +			r = amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
> +			fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
> +		}
> +
>   		if (r)
>   			return r;
>   		break;
>   	case AMDGPU_VM_OP_UNRESERVE_VMID:
> -		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0));
> +		if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> +			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
> +			fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
> +		}
>   		break;
>   	default:
>   		return -EINVAL;



More information about the amd-gfx mailing list