[PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
Christian König
deathsimple at vodafone.de
Sat Aug 26 13:20:10 UTC 2017
Am 25.08.2017 um 23:31 schrieb Felix Kuehling:
> That's clever. I was scratching my head where the BOs were getting
> validated, just by sharing the VM reservation object. Until I carefully
> read your previous commit.
Yeah, didn't had time to properly comment on your last mail.
> In general, I find the VM code extremely frustrating and confusing to
> review. Too many lists of different things, and it's really hard to keep
> track of which list track what type of object, in which situation.
>
> For example, take struct amdgpu_vm:
>
> /* BOs who needs a validation */
> struct list_head evicted;
>
> /* BOs moved, but not yet updated in the PT */
> struct list_head moved;
>
> /* BO mappings freed, but not yet updated in the PT */
> struct list_head freed;
>
> Three lists of BOs (according to the comments). But evicted and moved
> are lists of amdgpu_vm_bo_base, freed is a list of amdgpu_bo_va_mapping.
> moved and freed are used for tracking BOs mapped in the VM. I think
> moved may also track page table BOs, but I'm not sure. evicted is used
> only for tracking page table BOs.
>
> In patch #7 you add relocated to the mix. Now it gets really funny.
> What's the difference between relocated and evicted and moved?
Essentially nothing. I actually tried to merge them, but then realized
that this would probably remove the ability to only update the
directories and so most likely your KFD usage of that.
> It seems
> PT BOs can be on any of these lists. I think evicted means the BO needs
> to be validated. moved or relocated means it's been validated but its
> mappings must be updated. For PT BOs and mapped BOs that means different
> things, so it makes sense to have different lists for them. But I think
> PT BOs can also end up on the moved list when amdgpu_vm_bo_invalidate is
> called for a page table BO (through amdgpu_bo_move_notify). So I'm still
> confused.
amdgpu_vm_bo_invalidate() has logic to always put PDs and PTs on the
relocated list and everything else on the moved list. So PDs/PTs should
never end up on the moved list.
> I think this could be clarified with more descriptive names for the
> lists. If PT BOs and mapped BOs must be tracked separately that should
> be clear from the names.
That is a good idea, but in the long term I want to merge reloacted and
moved list and then decide while walking the list what to do, but that
is the next step I think.
Regards,
Christian.
>
> </rant>
>
> Regards,
> Felix
>
>
> On 2017-08-25 05:38 AM, Christian König wrote:
>> From: Christian König <christian.koenig at amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces average
>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 59 ++++++++++++++++++++++---------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +-
>> include/uapi/drm/amdgpu_drm.h | 2 ++
>> 5 files changed, 51 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>> */
>> void amdgpu_gem_force_release(struct amdgpu_device *adev);
>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> - int alignment, u32 initial_domain,
>> - u64 flags, bool kernel,
>> - struct drm_gem_object **obj);
>> + int alignment, u32 initial_domain,
>> + u64 flags, bool kernel,
>> + struct reservation_object *resv,
>> + struct drm_gem_object **obj);
>>
>> int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>> struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> - true, &gobj);
>> + true, NULL, &gobj);
>> if (ret) {
>> pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>> return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d028806..b8e8d67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>> }
>>
>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> - int alignment, u32 initial_domain,
>> - u64 flags, bool kernel,
>> - struct drm_gem_object **obj)
>> + int alignment, u32 initial_domain,
>> + u64 flags, bool kernel,
>> + struct reservation_object *resv,
>> + struct drm_gem_object **obj)
>> {
>> - struct amdgpu_bo *robj;
>> + struct amdgpu_bo *bo;
>> int r;
>>
>> *obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>>
>> retry:
>> r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>> - flags, NULL, NULL, 0, &robj);
>> + flags, NULL, resv, 0, &bo);
>> if (r) {
>> if (r != -ERESTARTSYS) {
>> if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> }
>> return r;
>> }
>> - *obj = &robj->gem_base;
>> + *obj = &bo->gem_base;
>>
>> return 0;
>> }
>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>> struct amdgpu_vm *vm = &fpriv->vm;
>>
>> struct amdgpu_bo_list_entry vm_pd;
>> - struct list_head list;
>> + struct list_head list, duplicates;
>> struct ttm_validate_buffer tv;
>> struct ww_acquire_ctx ticket;
>> struct amdgpu_bo_va *bo_va;
>> int r;
>>
>> INIT_LIST_HEAD(&list);
>> + INIT_LIST_HEAD(&duplicates);
>>
>> tv.bo = &bo->tbo;
>> tv.shared = true;
>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>
>> amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>
>> - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>> if (r) {
>> dev_err(adev->dev, "leaking bo va because "
>> "we fail to reserve bo (%d)\n", r);
>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp)
>> {
>> struct amdgpu_device *adev = dev->dev_private;
>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> + struct amdgpu_vm *vm = &fpriv->vm;
>> union drm_amdgpu_gem_create *args = data;
>> uint64_t flags = args->in.domain_flags;
>> uint64_t size = args->in.bo_size;
>> + struct reservation_object *resv = NULL;
>> struct drm_gem_object *gobj;
>> uint32_t handle;
>> int r;
>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> - AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> + AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> + AMDGPU_GEM_CREATE_LOCAL))
>> return -EINVAL;
>>
>> /* reject invalid gem domains */
>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>> }
>> size = roundup(size, PAGE_SIZE);
>>
>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> + r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> + if (r)
>> + return r;
>> +
>> + resv = vm->root.base.bo->tbo.resv;
>> + }
>> +
>> r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>> (u32)(0xffffffff & args->in.domains),
>> - flags, false, &gobj);
>> + flags, false, resv, &gobj);
>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> + if (!r) {
>> + struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>> +
>> + abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> + }
>> + amdgpu_bo_unreserve(vm->root.base.bo);
>> + }
>> if (r)
>> return r;
>>
>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>> }
>>
>> /* create a gem object to contain this object in */
>> - r = amdgpu_gem_object_create(adev, args->size, 0,
>> - AMDGPU_GEM_DOMAIN_CPU, 0,
>> - 0, &gobj);
>> + r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
>> + 0, 0, NULL, &gobj);
>> if (r)
>> return r;
>>
>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>> struct amdgpu_bo_list_entry vm_pd;
>> struct ttm_validate_buffer tv;
>> struct ww_acquire_ctx ticket;
>> - struct list_head list;
>> + struct list_head list, duplicates;
>> uint64_t va_flags;
>> int r = 0;
>>
>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>> }
>>
>> INIT_LIST_HEAD(&list);
>> + INIT_LIST_HEAD(&duplicates);
>> if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>> !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>> gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>
>> amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>
>> - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>> if (r)
>> goto error_unref;
>>
>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>> int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp)
>> {
>> + struct amdgpu_device *adev = dev->dev_private;
>> struct drm_amdgpu_gem_op *args = data;
>> struct drm_gem_object *gobj;
>> struct amdgpu_bo *robj;
>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>> if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>> robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>
>> + if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> + amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>> amdgpu_bo_unreserve(robj);
>> break;
>> default:
>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>> r = amdgpu_gem_object_create(adev, args->size, 0,
>> AMDGPU_GEM_DOMAIN_VRAM,
>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> - ttm_bo_type_device,
>> - &gobj);
>> + false, NULL, &gobj);
>> if (r)
>> return -ENOMEM;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>> {
>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>
>> - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> + bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>> return ERR_PTR(-EPERM);
>>
>> return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>> #define AMDGPU_GEM_CREATE_SHADOW (1 << 4)
>> /* Flag that allocating the BO should use linear VRAM */
>> #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6)
>>
>> struct drm_amdgpu_gem_create_in {
>> /** the requested memory size */
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list