[Intel-gfx] [PATCH v3] drm/i915/gtt: Avoid calling kcalloc in a loop when allocating temp bitmaps

Michel Thierry michel.thierry at intel.com
Wed Sep 2 06:40:03 PDT 2015


On 9/1/2015 10:06 AM, Michał Winiarski wrote:
> On each call to gen8_alloc_va_range_3lvl we're allocating temporary
> bitmaps needed for error handling. Unfortunately, when we increase
> address space size (48b ppgtt) we do additional (512 - 4) calls to
> kcalloc, increasing latency between exec and actual start of execution
> on the GPU. Let's just do a single kcalloc, we can also drop the size
> from free_gen8_temp_bitmaps since it's no longer used.
>
> v2: Use GFP_TEMPORARY to make the allocations reclaimable.
> v3: Drop the 2D array, just allocate a single block.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>

Unless Chris thinks otherwise, I see Michał already addressed his comments.

Reviewed-by: Michel Thierry <michel.thierry at intel.com>

> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 45 +++++++++++++------------------------
>   1 file changed, 16 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 4a76807..f810762 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1126,13 +1126,8 @@ unwind_out:
>   }
>
>   static void
> -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
> -		       uint32_t pdpes)
> +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
>   {
> -	int i;
> -
> -	for (i = 0; i < pdpes; i++)
> -		kfree(new_pts[i]);
>   	kfree(new_pts);
>   	kfree(new_pds);
>   }
> @@ -1142,29 +1137,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
>    */
>   static
>   int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
> -					 unsigned long ***new_pts,
> +					 unsigned long **new_pts,
>   					 uint32_t pdpes)
>   {
> -	int i;
>   	unsigned long *pds;
> -	unsigned long **pts;
> +	unsigned long *pts;
>
> -	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL);
> +	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
>   	if (!pds)
>   		return -ENOMEM;
>
> -	pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL);
> -	if (!pts) {
> -		kfree(pds);
> -		return -ENOMEM;
> -	}
> -
> -	for (i = 0; i < pdpes; i++) {
> -		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
> -				 sizeof(unsigned long), GFP_KERNEL);
> -		if (!pts[i])
> -			goto err_out;
> -	}
> +	pts = kcalloc(pdpes * BITS_TO_LONGS(I915_PDES),
> +			sizeof(unsigned long), GFP_TEMPORARY);
> +	if (!pts)
> +		goto err_out;
>
>   	*new_pds = pds;
>   	*new_pts = pts;
> @@ -1172,7 +1158,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
>   	return 0;
>
>   err_out:
> -	free_gen8_temp_bitmaps(pds, pts, pdpes);
> +	free_gen8_temp_bitmaps(pds, pts);
>   	return -ENOMEM;
>   }
>
> @@ -1193,7 +1179,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>   {
>   	struct i915_hw_ppgtt *ppgtt =
>   		container_of(vm, struct i915_hw_ppgtt, base);
> -	unsigned long *new_page_dirs, **new_page_tables;
> +	unsigned long *new_page_dirs, *new_page_tables;
>   	struct drm_device *dev = vm->dev;
>   	struct i915_page_directory *pd;
>   	const uint64_t orig_start = start;
> @@ -1220,14 +1206,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>   	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
>   						new_page_dirs);
>   	if (ret) {
> -		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> +		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>   		return ret;
>   	}
>
>   	/* For every page directory referenced, allocate page tables */
>   	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
>   		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
> -						new_page_tables[pdpe]);
> +						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
>   		if (ret)
>   			goto err_out;
>   	}
> @@ -1278,20 +1264,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>   		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
>   	}
>
> -	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> +	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>   	mark_tlbs_dirty(ppgtt);
>   	return 0;
>
>   err_out:
>   	while (pdpe--) {
> -		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
> +		for_each_set_bit(temp, new_page_tables + pdpe *
> +				BITS_TO_LONGS(I915_PDES), I915_PDES)
>   			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
>   	}
>
>   	for_each_set_bit(pdpe, new_page_dirs, pdpes)
>   		free_pd(dev, pdp->page_directory[pdpe]);
>
> -	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> +	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>   	mark_tlbs_dirty(ppgtt);
>   	return ret;
>   }
>


More information about the Intel-gfx mailing list