[PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs

Marek Olšák maraeo at gmail.com
Fri Aug 25 16:22:12 UTC 2017


On Fri, Aug 25, 2017 at 3:00 PM, Christian König
<deathsimple at vodafone.de> wrote:
> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>
>>
>>
>> On 2017年08月25日 17:38, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig at amd.com>
>>>
>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>
>>> Still WIP since not all corner cases are tested yet, but this reduces
>>> average
>>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>> indeed save a lot of bo list.
>
>
> Don't cheer to loud yet, that is a completely constructed test case.
>
> So far I wasn't able to archive any improvements with any real game on this
> with Mesa.
>
> BTW: Marek can you take a look with some CPU bound tests? I can provide a
> kernel branch if necessary.

Do you have a branch that works on Raven? This patch series doesn't,
and I didn't investigate why.

Marek

>
> Regards,
> Christian.
>
>
>> overall looks good, I will take a detailed check for this tomorrow.
>>
>> Regards,
>> David Zhou
>>>
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b1e817c..21cab36 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>    */
>>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>> size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj);
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj);
>>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>                   struct drm_device *dev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> index 0e907ea..7256f83 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>> amdgpu_fbdev *rfbdev,
>>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> -                       true, &gobj);
>>> +                       true, NULL, &gobj);
>>>       if (ret) {
>>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>           return -ENOMEM;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d028806..b8e8d67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>> *gobj)
>>>   }
>>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>> long size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj)
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj)
>>>   {
>>> -    struct amdgpu_bo *robj;
>>> +    struct amdgpu_bo *bo;
>>>       int r;
>>>         *obj = NULL;
>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>     retry:
>>>       r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>> -                 flags, NULL, NULL, 0, &robj);
>>> +                 flags, NULL, resv, 0, &bo);
>>>       if (r) {
>>>           if (r != -ERESTARTSYS) {
>>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>           }
>>>           return r;
>>>       }
>>> -    *obj = &robj->gem_base;
>>> +    *obj = &bo->gem_base;
>>>         return 0;
>>>   }
>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct amdgpu_vm *vm = &fpriv->vm;
>>>         struct amdgpu_bo_list_entry vm_pd;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>>       struct amdgpu_bo_va *bo_va;
>>>       int r;
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>         tv.bo = &bo->tbo;
>>>       tv.shared = true;
>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>       if (r) {
>>>           dev_err(adev->dev, "leaking bo va because "
>>>               "we fail to reserve bo (%d)\n", r);
>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>                   struct drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>       union drm_amdgpu_gem_create *args = data;
>>>       uint64_t flags = args->in.domain_flags;
>>>       uint64_t size = args->in.bo_size;
>>> +    struct reservation_object *resv = NULL;
>>>       struct drm_gem_object *gobj;
>>>       uint32_t handle;
>>>       int r;
>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>           return -EINVAL;
>>>         /* reject invalid gem domains */
>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>       size = roundup(size, PAGE_SIZE);
>>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +        resv = vm->root.base.bo->tbo.resv;
>>> +    }
>>> +
>>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>                        (u32)(0xffffffff & args->in.domains),
>>> -                     flags, false, &gobj);
>>> +                     flags, false, resv, &gobj);
>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        if (!r) {
>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>> +
>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>> +        }
>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>> +    }
>>>       if (r)
>>>           return r;
>>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>> *dev, void *data,
>>>       }
>>>         /* create a gem object to contain this object in */
>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>> -                     0, &gobj);
>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> AMDGPU_GEM_DOMAIN_CPU,
>>> +                     0, 0, NULL, &gobj);
>>>       if (r)
>>>           return r;
>>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       struct amdgpu_bo_list_entry vm_pd;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       uint64_t va_flags;
>>>       int r = 0;
>>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>           gobj = drm_gem_object_lookup(filp, args->handle);
>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>> *data,
>>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>       if (r)
>>>           goto error_unref;
>>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>               struct drm_file *filp)
>>>   {
>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>       struct drm_amdgpu_gem_op *args = data;
>>>       struct drm_gem_object *gobj;
>>>       struct amdgpu_bo *robj;
>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>> *data,
>>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>> +
>>>           amdgpu_bo_unreserve(robj);
>>>           break;
>>>       default:
>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>> *file_priv,
>>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>> -                     ttm_bo_type_device,
>>> -                     &gobj);
>>> +                     false, NULL, &gobj);
>>>       if (r)
>>>           return -ENOMEM;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index 5b3f928..f407499 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>> drm_device *dev,
>>>   {
>>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>           return ERR_PTR(-EPERM);
>>>         return drm_gem_prime_export(dev, gobj, flags);
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index d0ee739..05241a6 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -89,6 +89,8 @@ extern "C" {
>>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>   /* Flag that allocating the BO should use linear VRAM */
>>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>> +/* Flag that BO is local in the VM */
>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>     struct drm_amdgpu_gem_create_in  {
>>>       /** the requested memory size */
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list