[PATCH 1/2] drm/etnaviv: Preallocate STLB according to CPU PAGE_SIZE

Tue Dec 3 18:04:18 UTC 2024

Am Freitag, dem 08.11.2024 um 22:36 +0800 schrieb Sui Jingfeng:
> The dma_direct_alloc() allocate one page at minmium, which size is the CPU
> PAGE_SIZE. while the etnaviv_iommuv2_ensure_stlb() only ask for 4KiB. The
> rest memory space that beyond 4KiB gets wasted on bigger page size systems.
> For example, on 16KiB CPU page size systems, we will waste the rest 12KiB.
> On 64KiB CPU page size systems, we will waste the rest 60KiB.
> 
> Since addresses within one page are always contiguous, the rest memory can
> be used to store adjacent slave TLB entries. Then, when the neighbourhoods
> TLB is being hit on the next time, we don't have to ask another one page
> from the system. Saving both memorys and times overhead because of that.
> 
While this isn't adding a lot of code to etnaviv, I wonder if this
couldn't be handled by using a dma_pool for the pagetable allocations.

Regards,
Lucas

> Signed-off-by: Sui Jingfeng <sui.jingfeng at linux.dev>
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 64 +++++++++++++++++++---
>  1 file changed, 56 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> index d664ae29ae20..fa6eed1ae1be 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> @@ -44,19 +44,66 @@ to_v2_context(struct etnaviv_iommu_context *context)
>  	return container_of(context, struct etnaviv_iommuv2_context, base);
>  }
>  
> +static int etnaviv_iommuv2_stlb_free(struct etnaviv_iommuv2_context *context)
> +{
> +	struct device *dev = context->base.global->dev;
> +	unsigned int i;
> +
> +	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; ++i) {
> +		u32 *vaddr = context->stlb_cpu[i];
> +
> +		if (!vaddr)
> +			continue;
> +
> +		context->stlb_cpu[i] = NULL;
> +
> +		if (i % (PAGE_SIZE / SZ_4K))
> +			continue;
> +
> +		dma_free_wc(dev, PAGE_SIZE, vaddr, context->stlb_dma[i]);
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +etnaviv_iommuv2_ensure_stlb_new(struct etnaviv_iommuv2_context *context,
> +				unsigned int stlb)
> +{
> +	struct device *dev = context->base.global->dev;
> +	void *vaddr;
> +	dma_addr_t daddr;
> +	unsigned int i;
> +
> +	if (context->stlb_cpu[stlb])
> +		return 0;
> +
> +	vaddr = dma_alloc_wc(dev, PAGE_SIZE, &daddr, GFP_KERNEL);
> +	if (!vaddr)
> +		return -ENOMEM;
> +
> +	memset32(vaddr, MMUv2_PTE_EXCEPTION, PAGE_SIZE / sizeof(u32));
> +
> +	stlb &= ~(PAGE_SIZE / SZ_4K - 1);
> +
> +	for (i = 0; i < PAGE_SIZE / SZ_4K; ++i) {
> +		context->stlb_cpu[stlb + i] = vaddr;
> +		context->stlb_dma[stlb + i] = daddr;
> +		context->mtlb_cpu[stlb + i] = daddr | MMUv2_PTE_PRESENT;
> +		vaddr += SZ_4K;
> +		daddr += SZ_4K;
> +	}
> +
> +	return 0;
> +}
> +
>  static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
>  {
>  	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> -	int i;
>  
>  	drm_mm_takedown(&context->mm);
>  
> -	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
> -		if (v2_context->stlb_cpu[i])
> -			dma_free_wc(context->global->dev, SZ_4K,
> -				    v2_context->stlb_cpu[i],
> -				    v2_context->stlb_dma[i]);
> -	}
> +	etnaviv_iommuv2_stlb_free(v2_context);
>  
>  	dma_free_wc(context->global->dev, SZ_4K, v2_context->mtlb_cpu,
>  		    v2_context->mtlb_dma);
> @@ -65,6 +112,7 @@ static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
>  
>  	vfree(v2_context);
>  }
> +
>  static int
>  etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_context *v2_context,
>  			    int stlb)
> @@ -109,7 +157,7 @@ static int etnaviv_iommuv2_map(struct etnaviv_iommu_context *context,
>  	mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT;
>  	stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT;
>  
> -	ret = etnaviv_iommuv2_ensure_stlb(v2_context, mtlb_entry);
> +	ret = etnaviv_iommuv2_ensure_stlb_new(v2_context, mtlb_entry);
>  	if (ret)
>  		return ret;
>