[PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
Christian König
deathsimple at vodafone.de
Fri Aug 25 19:19:21 UTC 2017
Am 25.08.2017 um 18:22 schrieb Marek Olšák:
> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
> <deathsimple at vodafone.de> wrote:
>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>
>>>
>>> On 2017年08月25日 17:38, Christian König wrote:
>>>> From: Christian König <christian.koenig at amd.com>
>>>>
>>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>>
>>>> Still WIP since not all corner cases are tested yet, but this reduces
>>>> average
>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>>> indeed save a lot of bo list.
>>
>> Don't cheer to loud yet, that is a completely constructed test case.
>>
>> So far I wasn't able to archive any improvements with any real game on this
>> with Mesa.
>>
>> BTW: Marek can you take a look with some CPU bound tests? I can provide a
>> kernel branch if necessary.
> Do you have a branch that works on Raven? This patch series doesn't,
> and I didn't investigate why.
I will come up with one on Monday.
Have a nice weekend guys,
Christian.
>
> Marek
>
>> Regards,
>> Christian.
>>
>>
>>> overall looks good, I will take a detailed check for this tomorrow.
>>>
>>> Regards,
>>> David Zhou
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++--
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +-
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 59
>>>> ++++++++++++++++++++++---------
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +-
>>>> include/uapi/drm/amdgpu_drm.h | 2 ++
>>>> 5 files changed, 51 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b1e817c..21cab36 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>> */
>>>> void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>>> size,
>>>> - int alignment, u32 initial_domain,
>>>> - u64 flags, bool kernel,
>>>> - struct drm_gem_object **obj);
>>>> + int alignment, u32 initial_domain,
>>>> + u64 flags, bool kernel,
>>>> + struct reservation_object *resv,
>>>> + struct drm_gem_object **obj);
>>>> int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>> struct drm_device *dev,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> index 0e907ea..7256f83 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>> amdgpu_fbdev *rfbdev,
>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>> AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>> - true, &gobj);
>>>> + true, NULL, &gobj);
>>>> if (ret) {
>>>> pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>> return -ENOMEM;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> index d028806..b8e8d67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>>> *gobj)
>>>> }
>>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>>> long size,
>>>> - int alignment, u32 initial_domain,
>>>> - u64 flags, bool kernel,
>>>> - struct drm_gem_object **obj)
>>>> + int alignment, u32 initial_domain,
>>>> + u64 flags, bool kernel,
>>>> + struct reservation_object *resv,
>>>> + struct drm_gem_object **obj)
>>>> {
>>>> - struct amdgpu_bo *robj;
>>>> + struct amdgpu_bo *bo;
>>>> int r;
>>>> *obj = NULL;
>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>> retry:
>>>> r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>>> - flags, NULL, NULL, 0, &robj);
>>>> + flags, NULL, resv, 0, &bo);
>>>> if (r) {
>>>> if (r != -ERESTARTSYS) {
>>>> if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>> }
>>>> return r;
>>>> }
>>>> - *obj = &robj->gem_base;
>>>> + *obj = &bo->gem_base;
>>>> return 0;
>>>> }
>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>> struct amdgpu_vm *vm = &fpriv->vm;
>>>> struct amdgpu_bo_list_entry vm_pd;
>>>> - struct list_head list;
>>>> + struct list_head list, duplicates;
>>>> struct ttm_validate_buffer tv;
>>>> struct ww_acquire_ctx ticket;
>>>> struct amdgpu_bo_va *bo_va;
>>>> int r;
>>>> INIT_LIST_HEAD(&list);
>>>> + INIT_LIST_HEAD(&duplicates);
>>>> tv.bo = &bo->tbo;
>>>> tv.shared = true;
>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>> amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>> - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>> + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>> if (r) {
>>>> dev_err(adev->dev, "leaking bo va because "
>>>> "we fail to reserve bo (%d)\n", r);
>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> struct drm_file *filp)
>>>> {
>>>> struct amdgpu_device *adev = dev->dev_private;
>>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> + struct amdgpu_vm *vm = &fpriv->vm;
>>>> union drm_amdgpu_gem_create *args = data;
>>>> uint64_t flags = args->in.domain_flags;
>>>> uint64_t size = args->in.bo_size;
>>>> + struct reservation_object *resv = NULL;
>>>> struct drm_gem_object *gobj;
>>>> uint32_t handle;
>>>> int r;
>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>> - AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>> + AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>> + AMDGPU_GEM_CREATE_LOCAL))
>>>> return -EINVAL;
>>>> /* reject invalid gem domains */
>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> }
>>>> size = roundup(size, PAGE_SIZE);
>>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> + r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>> + if (r)
>>>> + return r;
>>>> +
>>>> + resv = vm->root.base.bo->tbo.resv;
>>>> + }
>>>> +
>>>> r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>> (u32)(0xffffffff & args->in.domains),
>>>> - flags, false, &gobj);
>>>> + flags, false, resv, &gobj);
>>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> + if (!r) {
>>>> + struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>> +
>>>> + abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>> + }
>>>> + amdgpu_bo_unreserve(vm->root.base.bo);
>>>> + }
>>>> if (r)
>>>> return r;
>>>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>>> *dev, void *data,
>>>> }
>>>> /* create a gem object to contain this object in */
>>>> - r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> - AMDGPU_GEM_DOMAIN_CPU, 0,
>>>> - 0, &gobj);
>>>> + r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>> + 0, 0, NULL, &gobj);
>>>> if (r)
>>>> return r;
>>>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> struct amdgpu_bo_list_entry vm_pd;
>>>> struct ttm_validate_buffer tv;
>>>> struct ww_acquire_ctx ticket;
>>>> - struct list_head list;
>>>> + struct list_head list, duplicates;
>>>> uint64_t va_flags;
>>>> int r = 0;
>>>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> }
>>>> INIT_LIST_HEAD(&list);
>>>> + INIT_LIST_HEAD(&duplicates);
>>>> if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>> !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>> gobj = drm_gem_object_lookup(filp, args->handle);
>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>>> *data,
>>>> amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>> - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>> + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>> if (r)
>>>> goto error_unref;
>>>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>> int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>> struct drm_file *filp)
>>>> {
>>>> + struct amdgpu_device *adev = dev->dev_private;
>>>> struct drm_amdgpu_gem_op *args = data;
>>>> struct drm_gem_object *gobj;
>>>> struct amdgpu_bo *robj;
>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>>> *data,
>>>> if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>> robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>> + if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>> + amdgpu_vm_bo_invalidate(adev, robj, true);
>>>> +
>>>> amdgpu_bo_unreserve(robj);
>>>> break;
>>>> default:
>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>> *file_priv,
>>>> r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> AMDGPU_GEM_DOMAIN_VRAM,
>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>> - ttm_bo_type_device,
>>>> - &gobj);
>>>> + false, NULL, &gobj);
>>>> if (r)
>>>> return -ENOMEM;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> index 5b3f928..f407499 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>> drm_device *dev,
>>>> {
>>>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>> - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>> + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>> + bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>> return ERR_PTR(-EPERM);
>>>> return drm_gem_prime_export(dev, gobj, flags);
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>> b/include/uapi/drm/amdgpu_drm.h
>>>> index d0ee739..05241a6 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>> #define AMDGPU_GEM_CREATE_SHADOW (1 << 4)
>>>> /* Flag that allocating the BO should use linear VRAM */
>>>> #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5)
>>>> +/* Flag that BO is local in the VM */
>>>> +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6)
>>>> struct drm_amdgpu_gem_create_in {
>>>> /** the requested memory size */
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list