[Intel-gfx] [PATCH 18/23] drm/i915: Keep all partially allocated HWSP on a freelist
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jan 18 12:25:40 UTC 2019
On 17/01/2019 14:34, Chris Wilson wrote:
> Keep track of partially allocated pages for use in allocating future
> timeline HWSP. This is still without migration, so it is possible for
> the system to end up with each timeline in its own page, but we ensure
> that no new allocation would needless allocate a fresh page!
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_timeline.c | 81 +++++++++++++++++-----------
> 2 files changed, 50 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d59228dabb6e..0bebef428f1e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1981,8 +1981,7 @@ struct drm_i915_private {
>
> /* Pack multiple timelines' seqnos into the same page */
> spinlock_t hwsp_lock;
> - struct i915_vma *hwsp;
> - u64 bitmap;
> + struct list_head hwsp;
hwsp_list to use our established convention? Actually, hwsp_free_list
would be even more self-documenting.
> } timelines;
>
> struct list_head active_rings;
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index e939a9e1a4ab..64bb1ce24318 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -9,74 +9,94 @@
> #include "i915_timeline.h"
> #include "i915_syncmap.h"
>
> +struct i915_timeline_hwsp {
> + struct list_head link;
> + struct i915_vma *vma;
> + u64 bitmap;
> +};
> +
> static int hwsp_alloc(struct i915_timeline *timeline)
> {
> -#define NBITS BITS_PER_TYPE(typeof(gt->bitmap))
> struct drm_i915_private *i915 = timeline->i915;
> struct i915_gt_timelines *gt = &i915->gt.timelines;
> - struct i915_vma *vma;
> + struct i915_timeline_hwsp *hwsp;
> int offset;
>
> spin_lock(>->hwsp_lock);
>
> -restart:
> - offset = find_first_bit((unsigned long *)>->bitmap, NBITS);
> - if (offset == NBITS && gt->hwsp) {
> - i915_vma_put(gt->hwsp);
> - gt->hwsp = NULL;
> - }
> -
> - vma = gt->hwsp;
> - if (!vma) {
> + hwsp = list_first_entry_or_null(>->hwsp, typeof(*hwsp), link);
> + if (!hwsp) {
> struct drm_i915_gem_object *bo;
> + struct i915_vma *vma;
>
> spin_unlock(>->hwsp_lock);
>
> - BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
> + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
> + if (!hwsp)
> + return -ENOMEM;
> +
> + BUILD_BUG_ON(BITS_PER_TYPE(hwsp->bitmap) * CACHELINE_BYTES > PAGE_SIZE);
> bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
> - if (IS_ERR(bo))
> + if (IS_ERR(bo)) {
> + kfree(hwsp);
> return PTR_ERR(bo);
> + }
>
> i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>
> vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
> if (IS_ERR(vma)) {
> i915_gem_object_put(bo);
> + kfree(hwsp);
> return PTR_ERR(vma);
> }
>
> - spin_lock(>->hwsp_lock);
> - if (gt->hwsp) {
> - i915_gem_object_put(bo);
> - goto restart;
> - }
> + vma->private = hwsp;
> + hwsp->vma = vma;
> + hwsp->bitmap = ~0ull;
>
> - gt->hwsp = vma;
> - gt->bitmap = ~0ull;
> - offset = 0;
> + spin_lock(>->hwsp_lock);
> + list_add(&hwsp->link, >->hwsp);
> }
>
> - gt->bitmap &= ~BIT_ULL(offset);
> + GEM_BUG_ON(!hwsp->bitmap);
> + offset = __ffs64(hwsp->bitmap);
I never can remember from which side is first. With find_first_bit I
didn't have this problem. :)
> + hwsp->bitmap &= ~BIT_ULL(offset);
> + if (!hwsp->bitmap)
> + list_del(&hwsp->link);
>
> spin_unlock(>->hwsp_lock);
>
> - timeline->hwsp_ggtt = i915_vma_get(vma);
> + timeline->hwsp_ggtt = i915_vma_get(hwsp->vma);
> timeline->hwsp_offset = offset * CACHELINE_BYTES;
>
> + GEM_BUG_ON(timeline->hwsp_ggtt->private != hwsp);
> +
> return 0;
> -#undef NBITS
> }
>
> static void hwsp_free(struct i915_timeline *timeline)
> {
> struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
> + struct i915_timeline_hwsp *hwsp;
>
> - if (timeline->hwsp_ggtt != gt->hwsp)
> + hwsp = timeline->hwsp_ggtt->private;
> + if (!hwsp)
> return;
Hm is there a path to this return now?
>
> spin_lock(>->hwsp_lock);
> - if (timeline->hwsp_ggtt == gt->hwsp)
> - gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> +
> + if (!hwsp->bitmap)
> + list_add_tail(&hwsp->link, >->hwsp);
I got so deep into the code that I forgot to remind to add some more
comments. :) This seems like a good place - oh well.. who would come up
with good comments anyway.
> +
> + hwsp->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> +
> + if (hwsp->bitmap == ~0ull) {
> + i915_vma_put(hwsp->vma);
> + list_del(&hwsp->link);
> + kfree(hwsp);
> + }
> +
> spin_unlock(>->hwsp_lock);
> }
>
> @@ -148,6 +168,7 @@ void i915_timelines_init(struct drm_i915_private *i915)
> INIT_LIST_HEAD(>->list);
>
> spin_lock_init(>->hwsp_lock);
> + INIT_LIST_HEAD(>->hwsp);
>
> /* via i915_gem_wait_for_idle() */
> i915_gem_shrinker_taints_mutex(i915, >->mutex);
> @@ -264,13 +285,9 @@ void __i915_timeline_free(struct kref *kref)
> void i915_timelines_fini(struct drm_i915_private *i915)
> {
> struct i915_gt_timelines *gt = &i915->gt.timelines;
> - struct i915_vma *vma;
>
> GEM_BUG_ON(!list_empty(>->list));
> -
> - vma = fetch_and_zero(&i915->gt.timelines.hwsp);
> - if (vma)
> - i915_vma_put(vma);
> + GEM_BUG_ON(!list_empty(>->hwsp));
>
> mutex_destroy(>->mutex);
> }
>
The only thing missing is a selftest which will exercise some "chunky"
timeline allocations and frees. Just to exercise this logic a bit..
something like maybe (in very crude pseudo-code):
blocks = { 1, 64 / 2, 64 - 1, 64, 64 + 1, 64 * 3 / 2 };
for_each_block {
for 0..block
tl_emit_rq(block);
sync
if (flags & LINEAR)
for 0..block
tl_free
else if (flags & RANDOM_FREE)
...
}
It's a very terse and sloppy sketch but I think you'll know what I am
trying to say.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list