[PATCH 2/2] drm/amdgpu: use a single linked list for amdgpu_vm_bo_base
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Sep 13 11:44:09 UTC 2018
Am 13.09.2018 um 00:58 schrieb Felix Kuehling:
> Is the small reduction in memory footprint (8 bytes per page table on a
> 64-bit kernel) really worth the trouble of open-coding a single-linked
> list implementation?
Well the key point is it is now a power of two again. So we don't waste
28KB per page table (on 4 levels) any more because it is rounded up to
the next order size :)
> I guess this change makes a bigger difference for
> 2-level page tables than it does for 4-level, because the amdgpu_vm_pt
> array is allocated at the page directory level and includes page tables
> that don't even exist yet and may never exist. The amount of memory you
> save is the same as the size of the page directory.
>
> I wonder if the overhead could be reduced more effectively by allocating
> struct amdgpu_vm_pt with the page table, rather than with the page
> directory. Then the amdgpu_vm_pt.entries array would be an array of
> pointers instead. It could be an array[0] at the end of the structure
> since the number of entries is know then the page directory is
> allocated. The BO could also be embedded in the amdgpu_vm_pt structure
> so it doesn't need to be a separate allocation from the amdgpu_vm_pt.
Yeah, thought about that as well. But the change looked to invasive on
first glance.
> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
Thanks,
Christian.
>
> Regards,
> Felix
>
>
> On 2018-09-12 04:55 AM, Christian König wrote:
>> Instead of the double linked list. Gets the size of amdgpu_vm_pt down to
>> 64 bytes again.
>>
>> We could even reduce it down to 32 bytes, but that would require some
>> rather extreme hacks.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 ++--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 38 ++++++++++++++++++++----------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
>> 4 files changed, 29 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index de990bdcdd6c..e6909252aefa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -448,7 +448,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>> return -ENOMEM;
>> drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
>> INIT_LIST_HEAD(&bo->shadow_list);
>> - INIT_LIST_HEAD(&bo->va);
>> + bo->vm_bo = NULL;
>> bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
>> bp->domain;
>> bo->allowed_domains = bo->preferred_domains;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 907fdf46d895..64337ff2ad63 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -89,8 +89,8 @@ struct amdgpu_bo {
>> void *metadata;
>> u32 metadata_size;
>> unsigned prime_shared_count;
>> - /* list of all virtual address to which this bo is associated to */
>> - struct list_head va;
>> + /* per VM structure for page tables and with virtual addresses */
>> + struct amdgpu_vm_bo_base *vm_bo;
>> /* Constant after initialization */
>> struct drm_gem_object gem_base;
>> struct amdgpu_bo *parent;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index cb6a5114128e..fb6b16273c54 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -309,12 +309,13 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>> {
>> base->vm = vm;
>> base->bo = bo;
>> - INIT_LIST_HEAD(&base->bo_list);
>> + base->next = NULL;
>> INIT_LIST_HEAD(&base->vm_status);
>>
>> if (!bo)
>> return;
>> - list_add_tail(&base->bo_list, &bo->va);
>> + base->next = bo->vm_bo;
>> + bo->vm_bo = base;
>>
>> if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
>> return;
>> @@ -352,7 +353,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
>> if (!parent)
>> return NULL;
>>
>> - return list_first_entry(&parent->va, struct amdgpu_vm_pt, base.bo_list);
>> + return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
>> }
>>
>> /**
>> @@ -954,7 +955,7 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
>> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
>>
>> if (entry->base.bo) {
>> - list_del(&entry->base.bo_list);
>> + entry->base.bo->vm_bo = NULL;
>> list_del(&entry->base.vm_status);
>> amdgpu_bo_unref(&entry->base.bo->shadow);
>> amdgpu_bo_unref(&entry->base.bo);
>> @@ -1162,12 +1163,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
>> struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>> struct amdgpu_bo *bo)
>> {
>> - struct amdgpu_bo_va *bo_va;
>> + struct amdgpu_vm_bo_base *base;
>>
>> - list_for_each_entry(bo_va, &bo->va, base.bo_list) {
>> - if (bo_va->base.vm == vm) {
>> - return bo_va;
>> - }
>> + for (base = bo->vm_bo; base; base = base->next) {
>> + if (base->vm != vm)
>> + continue;
>> +
>> + return container_of(base, struct amdgpu_bo_va, base);
>> }
>> return NULL;
>> }
>> @@ -2728,11 +2730,21 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>> struct amdgpu_bo_va_mapping *mapping, *next;
>> struct amdgpu_bo *bo = bo_va->base.bo;
>> struct amdgpu_vm *vm = bo_va->base.vm;
>> + struct amdgpu_vm_bo_base **base;
>>
>> - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)
>> - vm->bulk_moveable = false;
>> + if (bo) {
>> + if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
>> + vm->bulk_moveable = false;
>>
>> - list_del(&bo_va->base.bo_list);
>> + for (base = &bo_va->base.bo->vm_bo; *base;
>> + base = &(*base)->next) {
>> + if (*base != &bo_va->base)
>> + continue;
>> +
>> + *base = bo_va->base.next;
>> + break;
>> + }
>> + }
>>
>> spin_lock(&vm->invalidated_lock);
>> list_del(&bo_va->base.vm_status);
>> @@ -2774,7 +2786,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>> if (bo->parent && bo->parent->shadow == bo)
>> bo = bo->parent;
>>
>> - list_for_each_entry(bo_base, &bo->va, bo_list) {
>> + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
>> struct amdgpu_vm *vm = bo_base->vm;
>>
>> if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index e275ee7c1bc1..8966e40767eb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -128,7 +128,7 @@ struct amdgpu_vm_bo_base {
>> struct amdgpu_bo *bo;
>>
>> /* protected by bo being reserved */
>> - struct list_head bo_list;
>> + struct amdgpu_vm_bo_base *next;
>>
>> /* protected by spinlock */
>> struct list_head vm_status;
More information about the amd-gfx
mailing list