[PATCH 2/2] drm/amdgpu: use a single linked list for amdgpu_vm_bo_base

Thu Sep 13 11:44:09 UTC 2018

Am 13.09.2018 um 00:58 schrieb Felix Kuehling:
> Is the small reduction in memory footprint (8 bytes per page table on a
> 64-bit kernel) really worth the trouble of open-coding a single-linked
> list implementation?

Well the key point is it is now a power of two again. So we don't waste 
28KB per page table (on 4 levels) any more because it is rounded up to 
the next order size :)

> I guess this change makes a bigger difference for
> 2-level page tables than it does for 4-level, because the amdgpu_vm_pt
> array is allocated at the page directory level and includes page tables
> that don't even exist yet and may never exist. The amount of memory you
> save is the same as the size of the page directory.
>
> I wonder if the overhead could be reduced more effectively by allocating
> struct amdgpu_vm_pt with the page table, rather than with the page
> directory. Then the amdgpu_vm_pt.entries array would be an array of
> pointers instead. It could be an array[0] at the end of the structure
> since the number of entries is know then the page directory is
> allocated. The BO could also be embedded in the amdgpu_vm_pt structure
> so it doesn't need to be a separate allocation from the amdgpu_vm_pt.

Yeah, thought about that as well. But the change looked to invasive on 
first glance.

> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>

Thanks,
Christian.

>
> Regards,
>    Felix
>
>
> On 2018-09-12 04:55 AM, Christian König wrote:
>> Instead of the double linked list. Gets the size of amdgpu_vm_pt down to
>> 64 bytes again.
>>
>> We could even reduce it down to 32 bytes, but that would require some
>> rather extreme hacks.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 38 ++++++++++++++++++++----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +-
>>   4 files changed, 29 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index de990bdcdd6c..e6909252aefa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -448,7 +448,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>   		return -ENOMEM;
>>   	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
>>   	INIT_LIST_HEAD(&bo->shadow_list);
>> -	INIT_LIST_HEAD(&bo->va);
>> +	bo->vm_bo = NULL;
>>   	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
>>   		bp->domain;
>>   	bo->allowed_domains = bo->preferred_domains;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 907fdf46d895..64337ff2ad63 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -89,8 +89,8 @@ struct amdgpu_bo {
>>   	void				*metadata;
>>   	u32				metadata_size;
>>   	unsigned			prime_shared_count;
>> -	/* list of all virtual address to which this bo is associated to */
>> -	struct list_head		va;
>> +	/* per VM structure for page tables and with virtual addresses */
>> +	struct amdgpu_vm_bo_base	*vm_bo;
>>   	/* Constant after initialization */
>>   	struct drm_gem_object		gem_base;
>>   	struct amdgpu_bo		*parent;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index cb6a5114128e..fb6b16273c54 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -309,12 +309,13 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>   {
>>   	base->vm = vm;
>>   	base->bo = bo;
>> -	INIT_LIST_HEAD(&base->bo_list);
>> +	base->next = NULL;
>>   	INIT_LIST_HEAD(&base->vm_status);
>>   
>>   	if (!bo)
>>   		return;
>> -	list_add_tail(&base->bo_list, &bo->va);
>> +	base->next = bo->vm_bo;
>> +	bo->vm_bo = base;
>>   
>>   	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
>>   		return;
>> @@ -352,7 +353,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
>>   	if (!parent)
>>   		return NULL;
>>   
>> -	return list_first_entry(&parent->va, struct amdgpu_vm_pt, base.bo_list);
>> +	return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
>>   }
>>   
>>   /**
>> @@ -954,7 +955,7 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
>>   	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
>>   
>>   		if (entry->base.bo) {
>> -			list_del(&entry->base.bo_list);
>> +			entry->base.bo->vm_bo = NULL;
>>   			list_del(&entry->base.vm_status);
>>   			amdgpu_bo_unref(&entry->base.bo->shadow);
>>   			amdgpu_bo_unref(&entry->base.bo);
>> @@ -1162,12 +1163,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>   				       struct amdgpu_bo *bo)
>>   {
>> -	struct amdgpu_bo_va *bo_va;
>> +	struct amdgpu_vm_bo_base *base;
>>   
>> -	list_for_each_entry(bo_va, &bo->va, base.bo_list) {
>> -		if (bo_va->base.vm == vm) {
>> -			return bo_va;
>> -		}
>> +	for (base = bo->vm_bo; base; base = base->next) {
>> +		if (base->vm != vm)
>> +			continue;
>> +
>> +		return container_of(base, struct amdgpu_bo_va, base);
>>   	}
>>   	return NULL;
>>   }
>> @@ -2728,11 +2730,21 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>>   	struct amdgpu_bo_va_mapping *mapping, *next;
>>   	struct amdgpu_bo *bo = bo_va->base.bo;
>>   	struct amdgpu_vm *vm = bo_va->base.vm;
>> +	struct amdgpu_vm_bo_base **base;
>>   
>> -	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)
>> -		vm->bulk_moveable = false;
>> +	if (bo) {
>> +		if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
>> +			vm->bulk_moveable = false;
>>   
>> -	list_del(&bo_va->base.bo_list);
>> +		for (base = &bo_va->base.bo->vm_bo; *base;
>> +		     base = &(*base)->next) {
>> +			if (*base != &bo_va->base)
>> +				continue;
>> +
>> +			*base = bo_va->base.next;
>> +			break;
>> +		}
>> +	}
>>   
>>   	spin_lock(&vm->invalidated_lock);
>>   	list_del(&bo_va->base.vm_status);
>> @@ -2774,7 +2786,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   	if (bo->parent && bo->parent->shadow == bo)
>>   		bo = bo->parent;
>>   
>> -	list_for_each_entry(bo_base, &bo->va, bo_list) {
>> +	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
>>   		struct amdgpu_vm *vm = bo_base->vm;
>>   
>>   		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index e275ee7c1bc1..8966e40767eb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -128,7 +128,7 @@ struct amdgpu_vm_bo_base {
>>   	struct amdgpu_bo		*bo;
>>   
>>   	/* protected by bo being reserved */
>> -	struct list_head		bo_list;
>> +	struct amdgpu_vm_bo_base	*next;
>>   
>>   	/* protected by spinlock */
>>   	struct list_head		vm_status;