[Intel-gfx] [PATCH 18/23] drm/i915: Keep all partially allocated HWSP on a freelist

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jan 18 12:25:40 UTC 2019


On 17/01/2019 14:34, Chris Wilson wrote:
> Keep track of partially allocated pages for use in allocating future
> timeline HWSP. This is still without migration, so it is possible for
> the system to end up with each timeline in its own page, but we ensure
> that no new allocation would needless allocate a fresh page!
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h      |  3 +-
>   drivers/gpu/drm/i915/i915_timeline.c | 81 +++++++++++++++++-----------
>   2 files changed, 50 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d59228dabb6e..0bebef428f1e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1981,8 +1981,7 @@ struct drm_i915_private {
>   
>   			/* Pack multiple timelines' seqnos into the same page */
>   			spinlock_t hwsp_lock;
> -			struct i915_vma *hwsp;
> -			u64 bitmap;
> +			struct list_head hwsp;

hwsp_list to use our established convention? Actually, hwsp_free_list 
would be even more self-documenting.

>   		} timelines;
>   
>   		struct list_head active_rings;
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index e939a9e1a4ab..64bb1ce24318 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -9,74 +9,94 @@
>   #include "i915_timeline.h"
>   #include "i915_syncmap.h"
>   
> +struct i915_timeline_hwsp {
> +	struct list_head link;
> +	struct i915_vma *vma;
> +	u64 bitmap;
> +};
> +
>   static int hwsp_alloc(struct i915_timeline *timeline)
>   {
> -#define NBITS BITS_PER_TYPE(typeof(gt->bitmap))
>   	struct drm_i915_private *i915 = timeline->i915;
>   	struct i915_gt_timelines *gt = &i915->gt.timelines;
> -	struct i915_vma *vma;
> +	struct i915_timeline_hwsp *hwsp;
>   	int offset;
>   
>   	spin_lock(&gt->hwsp_lock);
>   
> -restart:
> -	offset = find_first_bit((unsigned long *)&gt->bitmap, NBITS);
> -	if (offset == NBITS && gt->hwsp) {
> -		i915_vma_put(gt->hwsp);
> -		gt->hwsp = NULL;
> -	}
> -
> -	vma = gt->hwsp;
> -	if (!vma) {
> +	hwsp = list_first_entry_or_null(&gt->hwsp, typeof(*hwsp), link);
> +	if (!hwsp) {
>   		struct drm_i915_gem_object *bo;
> +		struct i915_vma *vma;
>   
>   		spin_unlock(&gt->hwsp_lock);
>   
> -		BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> +		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
> +		if (!hwsp)
> +			return -ENOMEM;
> +
> +		BUILD_BUG_ON(BITS_PER_TYPE(hwsp->bitmap) * CACHELINE_BYTES > PAGE_SIZE);
>   		bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> -		if (IS_ERR(bo))
> +		if (IS_ERR(bo)) {
> +			kfree(hwsp);
>   			return PTR_ERR(bo);
> +		}
>   
>   		i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>   
>   		vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
>   		if (IS_ERR(vma)) {
>   			i915_gem_object_put(bo);
> +			kfree(hwsp);
>   			return PTR_ERR(vma);
>   		}
>   
> -		spin_lock(&gt->hwsp_lock);
> -		if (gt->hwsp) {
> -			i915_gem_object_put(bo);
> -			goto restart;
> -		}
> +		vma->private = hwsp;
> +		hwsp->vma = vma;
> +		hwsp->bitmap = ~0ull;
>   
> -		gt->hwsp = vma;
> -		gt->bitmap = ~0ull;
> -		offset = 0;
> +		spin_lock(&gt->hwsp_lock);
> +		list_add(&hwsp->link, &gt->hwsp);
>   	}
>   
> -	gt->bitmap &= ~BIT_ULL(offset);
> +	GEM_BUG_ON(!hwsp->bitmap);
> +	offset = __ffs64(hwsp->bitmap);

I never can remember from which side is first. With find_first_bit I 
didn't have this problem. :)

> +	hwsp->bitmap &= ~BIT_ULL(offset);
> +	if (!hwsp->bitmap)
> +		list_del(&hwsp->link);
>   
>   	spin_unlock(&gt->hwsp_lock);
>   
> -	timeline->hwsp_ggtt = i915_vma_get(vma);
> +	timeline->hwsp_ggtt = i915_vma_get(hwsp->vma);
>   	timeline->hwsp_offset = offset * CACHELINE_BYTES;
>   
> +	GEM_BUG_ON(timeline->hwsp_ggtt->private != hwsp);
> +
>   	return 0;
> -#undef NBITS
>   }
>   
>   static void hwsp_free(struct i915_timeline *timeline)
>   {
>   	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
> +	struct i915_timeline_hwsp *hwsp;
>   
> -	if (timeline->hwsp_ggtt != gt->hwsp)
> +	hwsp = timeline->hwsp_ggtt->private;
> +	if (!hwsp)
>   		return;

Hm is there a path to this return now?

>   
>   	spin_lock(&gt->hwsp_lock);
> -	if (timeline->hwsp_ggtt == gt->hwsp)
> -		gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> +
> +	if (!hwsp->bitmap)
> +		list_add_tail(&hwsp->link, &gt->hwsp);

I got so deep into the code that I forgot to remind to add some more 
comments. :) This seems like a good place - oh well.. who would come up 
with good comments anyway.

> +
> +	hwsp->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> +
> +	if (hwsp->bitmap == ~0ull) {
> +		i915_vma_put(hwsp->vma);
> +		list_del(&hwsp->link);
> +		kfree(hwsp);
> +	}
> +
>   	spin_unlock(&gt->hwsp_lock);
>   }
>   
> @@ -148,6 +168,7 @@ void i915_timelines_init(struct drm_i915_private *i915)
>   	INIT_LIST_HEAD(&gt->list);
>   
>   	spin_lock_init(&gt->hwsp_lock);
> +	INIT_LIST_HEAD(&gt->hwsp);
>   
>   	/* via i915_gem_wait_for_idle() */
>   	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
> @@ -264,13 +285,9 @@ void __i915_timeline_free(struct kref *kref)
>   void i915_timelines_fini(struct drm_i915_private *i915)
>   {
>   	struct i915_gt_timelines *gt = &i915->gt.timelines;
> -	struct i915_vma *vma;
>   
>   	GEM_BUG_ON(!list_empty(&gt->list));
> -
> -	vma = fetch_and_zero(&i915->gt.timelines.hwsp);
> -	if (vma)
> -		i915_vma_put(vma);
> +	GEM_BUG_ON(!list_empty(&gt->hwsp));
>   
>   	mutex_destroy(&gt->mutex);
>   }
> 

The only thing missing is a selftest which will exercise some "chunky" 
timeline allocations and frees. Just to exercise this logic a bit.. 
something like maybe (in very crude pseudo-code):

	blocks = { 1, 64 / 2, 64 - 1, 64, 64 + 1, 64 * 3 / 2 };

	for_each_block {
		for 0..block
			tl_emit_rq(block);
		sync
		if (flags & LINEAR)
			for 0..block
				tl_free
		else if (flags & RANDOM_FREE)
			...
	}

It's a very terse and sloppy sketch but I think you'll know what I am 
trying to say.

Regards,

Tvrtko


More information about the Intel-gfx mailing list