[PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v2

Christian König deathsimple at vodafone.de
Wed Aug 30 06:09:52 UTC 2017


Am 29.08.2017 um 19:20 schrieb Deucher, Alexander:
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>> Of Christian König
>> Sent: Tuesday, August 29, 2017 1:08 PM
>> To: amd-gfx at lists.freedesktop.org
>> Subject: [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v2
>>
>> From: Christian König <christian.koenig at amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces average
>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> v2: add some extra checks, remove the WIP tag
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 63
>> ++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>   include/uapi/drm/amdgpu_drm.h             |  2 +
>>   5 files changed, 55 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>    */
>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>> long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj);
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj);
>>
>>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>   			    struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>> amdgpu_fbdev *rfbdev,
>>
>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>
>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>
>> AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> -				       true, &gobj);
>> +				       true, NULL, &gobj);
>>   	if (ret) {
>>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>   		return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index e32a2b5..a835304 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct
>> drm_gem_object *gobj)
>>   }
>>
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>> long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj)
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj)
>>   {
>> -	struct amdgpu_bo *robj;
>> +	struct amdgpu_bo *bo;
>>   	int r;
>>
>>   	*obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>> *adev, unsigned long size,
>>
>>   retry:
>>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>> -			     flags, NULL, NULL, 0, &robj);
>> +			     flags, NULL, resv, 0, &bo);
>>   	if (r) {
>>   		if (r != -ERESTARTSYS) {
>>   			if (initial_domain ==
>> AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>> *adev, unsigned long size,
>>   		}
>>   		return r;
>>   	}
>> -	*obj = &robj->gem_base;
>> +	*obj = &bo->gem_base;
>>
>>   	return 0;
>>   }
>> @@ -119,6 +120,10 @@ int amdgpu_gem_object_open(struct
>> drm_gem_object *obj,
>>   	if (mm && mm != current->mm)
>>   		return -EPERM;
>>
>> +	if (abo->flags & AMDGPU_GEM_CREATE_LOCAL &&
>> +	    abo->tbo.resv != vm->root.base.bo->tbo.resv)
>> +		return -EPERM;
>> +
>>   	r = amdgpu_bo_reserve(abo, false);
>>   	if (r)
>>   		return r;
>> @@ -142,13 +147,14 @@ void amdgpu_gem_object_close(struct
>> drm_gem_object *obj,
>>   	struct amdgpu_vm *vm = &fpriv->vm;
>>
>>   	struct amdgpu_bo_list_entry vm_pd;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>>   	struct amdgpu_bo_va *bo_va;
>>   	int r;
>>
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>
>>   	tv.bo = &bo->tbo;
>>   	tv.shared = true;
>> @@ -156,7 +162,7 @@ void amdgpu_gem_object_close(struct
>> drm_gem_object *obj,
>>
>>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>   	if (r) {
>>   		dev_err(adev->dev, "leaking bo va because "
>>   			"we fail to reserve bo (%d)\n", r);
>> @@ -191,9 +197,12 @@ int amdgpu_gem_create_ioctl(struct drm_device
>> *dev, void *data,
>>   			    struct drm_file *filp)
>>   {
>>   	struct amdgpu_device *adev = dev->dev_private;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	struct amdgpu_vm *vm = &fpriv->vm;
>>   	union drm_amdgpu_gem_create *args = data;
>>   	uint64_t flags = args->in.domain_flags;
>>   	uint64_t size = args->in.bo_size;
>> +	struct reservation_object *resv = NULL;
>>   	struct drm_gem_object *gobj;
>>   	uint32_t handle;
>>   	int r;
>> @@ -202,7 +211,8 @@ int amdgpu_gem_create_ioctl(struct drm_device
>> *dev, void *data,
>>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> +		      AMDGPU_GEM_CREATE_LOCAL))
>>   		return -EINVAL;
>>
>>   	/* reject invalid gem domains */
>> @@ -229,9 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device
>> *dev, void *data,
>>   	}
>>   	size = roundup(size, PAGE_SIZE);
>>
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +		if (r)
>> +			return r;
>> +
>> +		resv = vm->root.base.bo->tbo.resv;
>> +	}
>> +
>>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>   				     (u32)(0xffffffff & args->in.domains),
>> -				     flags, false, &gobj);
>> +				     flags, false, resv, &gobj);
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		if (!r) {
>> +			struct amdgpu_bo *abo =
>> gem_to_amdgpu_bo(gobj);
>> +
>> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> +		}
>> +		amdgpu_bo_unreserve(vm->root.base.bo);
>> +	}
>>   	if (r)
>>   		return r;
>>
>> @@ -273,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>> *dev, void *data,
>>   	}
>>
>>   	/* create a gem object to contain this object in */
>> -	r = amdgpu_gem_object_create(adev, args->size, 0,
>> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
>> -				     0, &gobj);
>> +	r = amdgpu_gem_object_create(adev, args->size, 0,
>> AMDGPU_GEM_DOMAIN_CPU,
>> +				     0, 0, NULL, &gobj);
>>   	if (r)
>>   		return r;
>>
>> @@ -527,7 +552,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>>   	struct amdgpu_bo_list_entry vm_pd;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	uint64_t va_flags;
>>   	int r = 0;
>>
>> @@ -563,6 +588,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>>   	}
>>
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>   		gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -579,7 +605,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>>
>>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>   	if (r)
>>   		goto error_unref;
>>
>> @@ -645,6 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   			struct drm_file *filp)
>>   {
>> +	struct amdgpu_device *adev = dev->dev_private;
>>   	struct drm_amdgpu_gem_op *args = data;
>>   	struct drm_gem_object *gobj;
>>   	struct amdgpu_bo *robj;
>> @@ -692,6 +719,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev,
>> void *data,
>>   		if (robj->allowed_domains ==
>> AMDGPU_GEM_DOMAIN_VRAM)
>>   			robj->allowed_domains |=
>> AMDGPU_GEM_DOMAIN_GTT;
>>
>> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> +			amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>>   		amdgpu_bo_unreserve(robj);
>>   		break;
>>   	default:
>> @@ -721,8 +751,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>> *file_priv,
>>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>>   				     AMDGPU_GEM_DOMAIN_VRAM,
>>
>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> -				     ttm_bo_type_device,
>> -				     &gobj);
>> +				     false, NULL, &gobj);
>>   	if (r)
>>   		return -ENOMEM;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>> drm_device *dev,
>>   {
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>
>> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>   		return ERR_PTR(-EPERM);
>>
>>   	return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>>   /* Flag that allocating the BO should use linear VRAM */
>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
> I'm not crazy about the name LOCAL.  Maybe something like ALWAYS_VALID?

Works for me as well. Dave any other opinion?

If everybody is ok with ALWAYS_VALID I'm going to use that one.

Christian.

>
> Alex
>
>>   struct drm_amdgpu_gem_create_in  {
>>   	/** the requested memory size */
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx




More information about the amd-gfx mailing list