[PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs

zhoucm1 david1.zhou at amd.com
Fri Aug 25 10:32:07 UTC 2017



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> Add the IOCTL interface so that applications can allocate per VM BOs.
>
> Still WIP since not all corner cases are tested yet, but this reduces average
> CS overhead for 10K BOs from 21ms down to 48us.
Wow, cheers, eventually you get per vm bo to same reservation with 
PD/pts, indeed save a lot of bo list.

overall looks good, I will take a detailed check for this tomorrow.

Regards,
David Zhou
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>   5 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b1e817c..21cab36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>    */
>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj);
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj);
>   
>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   			    struct drm_device *dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> index 0e907ea..7256f83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>   				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>   				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
> -				       true, &gobj);
> +				       true, NULL, &gobj);
>   	if (ret) {
>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..b8e8d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>   }
>   
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj)
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj)
>   {
> -	struct amdgpu_bo *robj;
> +	struct amdgpu_bo *bo;
>   	int r;
>   
>   	*obj = NULL;
> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   
>   retry:
>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
> -			     flags, NULL, NULL, 0, &robj);
> +			     flags, NULL, resv, 0, &bo);
>   	if (r) {
>   		if (r != -ERESTARTSYS) {
>   			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   		}
>   		return r;
>   	}
> -	*obj = &robj->gem_base;
> +	*obj = &bo->gem_base;
>   
>   	return 0;
>   }
> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   
>   	struct amdgpu_bo_list_entry vm_pd;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
>   	struct amdgpu_bo_va *bo_va;
>   	int r;
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   
>   	tv.bo = &bo->tbo;
>   	tv.shared = true;
> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   
>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>   	if (r) {
>   		dev_err(adev->dev, "leaking bo va because "
>   			"we fail to reserve bo (%d)\n", r);
> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *filp)
>   {
>   	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_vm *vm = &fpriv->vm;
>   	union drm_amdgpu_gem_create *args = data;
>   	uint64_t flags = args->in.domain_flags;
>   	uint64_t size = args->in.bo_size;
> +	struct reservation_object *resv = NULL;
>   	struct drm_gem_object *gobj;
>   	uint32_t handle;
>   	int r;
> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
> +		      AMDGPU_GEM_CREATE_LOCAL))
>   		return -EINVAL;
>   
>   	/* reject invalid gem domains */
> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	}
>   	size = roundup(size, PAGE_SIZE);
>   
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +		if (r)
> +			return r;
> +
> +		resv = vm->root.base.bo->tbo.resv;
> +	}
> +
>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>   				     (u32)(0xffffffff & args->in.domains),
> -				     flags, false, &gobj);
> +				     flags, false, resv, &gobj);
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		if (!r) {
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
> +		}
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>   	if (r)
>   		return r;
>   
> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	/* create a gem object to contain this object in */
> -	r = amdgpu_gem_object_create(adev, args->size, 0,
> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
> -				     0, &gobj);
> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
> +				     0, 0, NULL, &gobj);
>   	if (r)
>   		return r;
>   
> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	struct amdgpu_bo_list_entry vm_pd;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	uint64_t va_flags;
>   	int r = 0;
>   
> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>   		gobj = drm_gem_object_lookup(filp, args->handle);
> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   
>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>   	if (r)
>   		goto error_unref;
>   
> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *filp)
>   {
> +	struct amdgpu_device *adev = dev->dev_private;
>   	struct drm_amdgpu_gem_op *args = data;
>   	struct drm_gem_object *gobj;
>   	struct amdgpu_bo *robj;
> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>   			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>   
> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
> +			amdgpu_vm_bo_invalidate(adev, robj, true);
> +
>   		amdgpu_bo_unreserve(robj);
>   		break;
>   	default:
> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>   				     AMDGPU_GEM_DOMAIN_VRAM,
>   				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> -				     ttm_bo_type_device,
> -				     &gobj);
> +				     false, NULL, &gobj);
>   	if (r)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..f407499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   {
>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>   
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>   		return ERR_PTR(-EPERM);
>   
>   	return drm_gem_prime_export(dev, gobj, flags);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d0ee739..05241a6 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -89,6 +89,8 @@ extern "C" {
>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>   /* Flag that allocating the BO should use linear VRAM */
>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
> +/* Flag that BO is local in the VM */
> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>   
>   struct drm_amdgpu_gem_create_in  {
>   	/** the requested memory size */



More information about the amd-gfx mailing list