[PATCH 01/11] drm/amdgpu: try allocating VRAM as power of two

Tue Sep 11 00:08:43 UTC 2018

This looks good. But it complicates something I've been looking at:
Remembering which process drm_mm_nodes last belonged to, so that they
don't need to be cleared next time they are allocated by the same
process. Having most nodes the same size (vram_page_split pages) would
make this very easy and efficient for the most common cases (large
allocations without any exotic address limitations or alignment
requirements).

Does anything else in this patch series depend on this optimization?

Regards,
  Felix

On 2018-09-09 02:03 PM, Christian König wrote:
> Try to allocate VRAM in power of two sizes and only fallback to vram
> split sizes if that fails.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 52 +++++++++++++++++++++-------
>  1 file changed, 40 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 9cfa8a9ada92..3f9d5d00c9b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -124,6 +124,28 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
>  	return usage;
>  }
>  
> +/**
> + * amdgpu_vram_mgr_virt_start - update virtual start address
> + *
> + * @mem: ttm_mem_reg to update
> + * @node: just allocated node
> + *
> + * Calculate a virtual BO start address to easily check if everything is CPU
> + * accessible.
> + */
> +static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
> +				       struct drm_mm_node *node)
> +{
> +	unsigned long start;
> +
> +	start = node->start + node->size;
> +	if (start > mem->num_pages)
> +		start -= mem->num_pages;
> +	else
> +		start = 0;
> +	mem->start = max(mem->start, start);
> +}
> +
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -176,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  	pages_left = mem->num_pages;
>  
>  	spin_lock(&mgr->lock);
> -	for (i = 0; i < num_nodes; ++i) {
> +	for (i = 0; pages_left >= pages_per_node; ++i) {
> +		unsigned long pages = rounddown_pow_of_two(pages_left);
> +
> +		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
> +						pages_per_node, 0,
> +						place->fpfn, lpfn,
> +						mode);
> +		if (unlikely(r))
> +			break;
> +
> +		usage += nodes[i].size << PAGE_SHIFT;
> +		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
> +		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
> +		pages_left -= pages;
> +	}
> +
> +	for (; pages_left; ++i) {
>  		unsigned long pages = min(pages_left, pages_per_node);
>  		uint32_t alignment = mem->page_alignment;
> -		unsigned long start;
>  
>  		if (pages == pages_per_node)
>  			alignment = pages_per_node;
> @@ -193,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
>  
>  		usage += nodes[i].size << PAGE_SHIFT;
>  		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
> -
> -		/* Calculate a virtual BO start address to easily check if
> -		 * everything is CPU accessible.
> -		 */
> -		start = nodes[i].start + nodes[i].size;
> -		if (start > mem->num_pages)
> -			start -= mem->num_pages;
> -		else
> -			start = 0;
> -		mem->start = max(mem->start, start);
> +		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
>  		pages_left -= pages;
>  	}
>  	spin_unlock(&mgr->lock);