[Intel-gfx] [PATCH v3] drm/i915/gtt: Avoid calling kcalloc in a loop when allocating temp bitmaps
Michel Thierry
michel.thierry at intel.com
Wed Sep 2 06:40:03 PDT 2015
On 9/1/2015 10:06 AM, Michał Winiarski wrote:
> On each call to gen8_alloc_va_range_3lvl we're allocating temporary
> bitmaps needed for error handling. Unfortunately, when we increase
> address space size (48b ppgtt) we do additional (512 - 4) calls to
> kcalloc, increasing latency between exec and actual start of execution
> on the GPU. Let's just do a single kcalloc, we can also drop the size
> from free_gen8_temp_bitmaps since it's no longer used.
>
> v2: Use GFP_TEMPORARY to make the allocations reclaimable.
> v3: Drop the 2D array, just allocate a single block.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
Unless Chris thinks otherwise, I see Michał already addressed his comments.
Reviewed-by: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 45 +++++++++++++------------------------
> 1 file changed, 16 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 4a76807..f810762 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1126,13 +1126,8 @@ unwind_out:
> }
>
> static void
> -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
> - uint32_t pdpes)
> +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
> {
> - int i;
> -
> - for (i = 0; i < pdpes; i++)
> - kfree(new_pts[i]);
> kfree(new_pts);
> kfree(new_pds);
> }
> @@ -1142,29 +1137,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
> */
> static
> int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
> - unsigned long ***new_pts,
> + unsigned long **new_pts,
> uint32_t pdpes)
> {
> - int i;
> unsigned long *pds;
> - unsigned long **pts;
> + unsigned long *pts;
>
> - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL);
> + pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
> if (!pds)
> return -ENOMEM;
>
> - pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL);
> - if (!pts) {
> - kfree(pds);
> - return -ENOMEM;
> - }
> -
> - for (i = 0; i < pdpes; i++) {
> - pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
> - sizeof(unsigned long), GFP_KERNEL);
> - if (!pts[i])
> - goto err_out;
> - }
> + pts = kcalloc(pdpes * BITS_TO_LONGS(I915_PDES),
> + sizeof(unsigned long), GFP_TEMPORARY);
> + if (!pts)
> + goto err_out;
>
> *new_pds = pds;
> *new_pts = pts;
> @@ -1172,7 +1158,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
> return 0;
>
> err_out:
> - free_gen8_temp_bitmaps(pds, pts, pdpes);
> + free_gen8_temp_bitmaps(pds, pts);
> return -ENOMEM;
> }
>
> @@ -1193,7 +1179,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
> {
> struct i915_hw_ppgtt *ppgtt =
> container_of(vm, struct i915_hw_ppgtt, base);
> - unsigned long *new_page_dirs, **new_page_tables;
> + unsigned long *new_page_dirs, *new_page_tables;
> struct drm_device *dev = vm->dev;
> struct i915_page_directory *pd;
> const uint64_t orig_start = start;
> @@ -1220,14 +1206,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
> ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
> new_page_dirs);
> if (ret) {
> - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
> return ret;
> }
>
> /* For every page directory referenced, allocate page tables */
> gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
> ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
> - new_page_tables[pdpe]);
> + new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
> if (ret)
> goto err_out;
> }
> @@ -1278,20 +1264,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
> gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
> }
>
> - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
> mark_tlbs_dirty(ppgtt);
> return 0;
>
> err_out:
> while (pdpe--) {
> - for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
> + for_each_set_bit(temp, new_page_tables + pdpe *
> + BITS_TO_LONGS(I915_PDES), I915_PDES)
> free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
> }
>
> for_each_set_bit(pdpe, new_page_dirs, pdpes)
> free_pd(dev, pdp->page_directory[pdpe]);
>
> - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
> + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
> mark_tlbs_dirty(ppgtt);
> return ret;
> }
>
More information about the Intel-gfx
mailing list