[Intel-gfx] [PATCH 17/23] drm/i915: Share per-timeline HWSP using a slab suballocator
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jan 18 12:08:40 UTC 2019
On 17/01/2019 14:34, Chris Wilson wrote:
> If we restrict ourselves to only using a cacheline for each timeline's
> HWSP (we could go smaller, but want to avoid needless polluting
> cachelines on different engines between different contexts), then we can
> suballocate a single 4k page into 64 different timeline HWSP. By
> treating each fresh allocation as a slab of 64 entries, we can keep it
> around for the next 64 allocation attempts until we need to refresh the
> slab cache.
>
> John Harrison noted the issue of fragmentation leading to the same worst
> case performance of one page per timeline as before, which can be
> mitigated by adopting a freelist.
Add "in a later patch."?
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 5 ++
> drivers/gpu/drm/i915/i915_timeline.c | 80 ++++++++++++++++++++++++----
> 2 files changed, 74 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3913900600b7..d59228dabb6e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1978,6 +1978,11 @@ struct drm_i915_private {
> struct i915_gt_timelines {
> struct mutex mutex; /* protects list, tainted by GPU */
> struct list_head list;
> +
> + /* Pack multiple timelines' seqnos into the same page */
> + spinlock_t hwsp_lock;
> + struct i915_vma *hwsp;
> + u64 bitmap;
freemap?
> } timelines;
>
> struct list_head active_rings;
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 380f4d25fb89..e939a9e1a4ab 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -11,26 +11,73 @@
>
> static int hwsp_alloc(struct i915_timeline *timeline)
> {
> +#define NBITS BITS_PER_TYPE(typeof(gt->bitmap))
> struct drm_i915_private *i915 = timeline->i915;
> - struct drm_i915_gem_object *bo;
> + struct i915_gt_timelines *gt = &i915->gt.timelines;
> struct i915_vma *vma;
> + int offset;
> +
> + spin_lock(>->hwsp_lock);
> +
> +restart:
> + offset = find_first_bit((unsigned long *)>->bitmap, NBITS);
> + if (offset == NBITS && gt->hwsp) {
> + i915_vma_put(gt->hwsp);
> + gt->hwsp = NULL;
> + }
> +
> + vma = gt->hwsp;
> + if (!vma) {
> + struct drm_i915_gem_object *bo;
> +
> + spin_unlock(>->hwsp_lock);
>
> - bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> - if (IS_ERR(bo))
> - return PTR_ERR(bo);
> + BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> + bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(bo))
> + return PTR_ERR(bo);
>
> - i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> + i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>
> - vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> - if (IS_ERR(vma)) {
> - i915_gem_object_put(bo);
> - return PTR_ERR(vma);
> + vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> + if (IS_ERR(vma)) {
> + i915_gem_object_put(bo);
> + return PTR_ERR(vma);
> + }
> +
> + spin_lock(>->hwsp_lock);
> + if (gt->hwsp) {
> + i915_gem_object_put(bo);
> + goto restart;
> + }
> +
> + gt->hwsp = vma;
> + gt->bitmap = ~0ull;
> + offset = 0;
> }
>
> - timeline->hwsp_ggtt = vma;
> - timeline->hwsp_offset = 0;
> + gt->bitmap &= ~BIT_ULL(offset);
> +
> + spin_unlock(>->hwsp_lock);
> +
> + timeline->hwsp_ggtt = i915_vma_get(vma);
> + timeline->hwsp_offset = offset * CACHELINE_BYTES;
>
> return 0;
> +#undef NBITS
> +}
> +
> +static void hwsp_free(struct i915_timeline *timeline)
> +{
> + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
> +
> + if (timeline->hwsp_ggtt != gt->hwsp)
> + return;
> +
> + spin_lock(>->hwsp_lock);
> + if (timeline->hwsp_ggtt == gt->hwsp)
> + gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> + spin_unlock(>->hwsp_lock);
> }
There is some asymmetry in hwsp_alloc dealing with both backing store
and bitmap management, while hwsp_free only deals with bitmap
management. I'll have a look in the next patch if it all goes away.
>
> int i915_timeline_init(struct drm_i915_private *i915,
> @@ -65,6 +112,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
>
> vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> if (IS_ERR(vaddr)) {
> + hwsp_free(timeline);
> i915_vma_put(timeline->hwsp_ggtt);
> return PTR_ERR(vaddr);
> }
> @@ -99,6 +147,8 @@ void i915_timelines_init(struct drm_i915_private *i915)
> mutex_init(>->mutex);
> INIT_LIST_HEAD(>->list);
>
> + spin_lock_init(>->hwsp_lock);
> +
> /* via i915_gem_wait_for_idle() */
> i915_gem_shrinker_taints_mutex(i915, >->mutex);
> }
> @@ -144,6 +194,9 @@ void i915_timeline_fini(struct i915_timeline *timeline)
> list_del(&timeline->link);
> mutex_unlock(>->mutex);
>
> + i915_syncmap_free(&timeline->sync);
> + hwsp_free(timeline);
> +
> i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
> i915_vma_put(timeline->hwsp_ggtt);
> }
> @@ -211,9 +264,14 @@ void __i915_timeline_free(struct kref *kref)
> void i915_timelines_fini(struct drm_i915_private *i915)
> {
> struct i915_gt_timelines *gt = &i915->gt.timelines;
> + struct i915_vma *vma;
>
> GEM_BUG_ON(!list_empty(>->list));
>
> + vma = fetch_and_zero(&i915->gt.timelines.hwsp);
> + if (vma)
> + i915_vma_put(vma);
> +
> mutex_destroy(>->mutex);
> }
>
>
Looks okay to me. But best wait for John to give it a check over as well.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list