[PATCH 14/20] drm/radeon: multiple ring allocator v2
Jerome Glisse
j.glisse at gmail.com
Mon May 7 08:23:51 PDT 2012
On Mon, May 7, 2012 at 7:42 AM, Christian König <deathsimple at vodafone.de> wrote:
> A startover with a new idea for a multiple ring allocator.
> Should perform as well as a normal ring allocator as long
> as only one ring does somthing, but falls back to a more
> complex algorithm if more complex things start to happen.
>
> We store the last allocated bo in last, we always try to allocate
> after the last allocated bo. Principle is that in a linear GPU ring
> progression was is after last is the oldest bo we allocated and thus
> the first one that should no longer be in use by the GPU.
>
> If it's not the case we skip over the bo after last to the closest
> done bo if such one exist. If none exist and we are not asked to
> block we report failure to allocate.
>
> If we are asked to block we wait on all the oldest fence of all
> rings. We just wait for any of those fence to complete.
>
> v2: We need to be able to let hole point to the list_head, otherwise
> try free will never free the first allocation of the list. Also
> stop calling radeon_fence_signalled more than necessary.
>
> Signed-off-by: Christian König <deathsimple at vodafone.de>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
This one is NAK please use my patch. Yes in my patch we never try to
free anything if there is only on sa_bo in the list if you really care
about this it's a one line change:
http://people.freedesktop.org/~glisse/reset5/0001-drm-radeon-multiple-ring-allocator-v2.patch
Your patch here can enter in infinite loop and never return holding
the lock. See below.
Cheers,
Jerome
> ---
> drivers/gpu/drm/radeon/radeon.h | 7 +-
> drivers/gpu/drm/radeon/radeon_ring.c | 19 +--
> drivers/gpu/drm/radeon/radeon_sa.c | 292 +++++++++++++++++++++++-----------
> 3 files changed, 210 insertions(+), 108 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 37a7459..cc7f16a 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -385,7 +385,9 @@ struct radeon_bo_list {
> struct radeon_sa_manager {
> spinlock_t lock;
> struct radeon_bo *bo;
> - struct list_head sa_bo;
> + struct list_head *hole;
> + struct list_head flist[RADEON_NUM_RINGS];
> + struct list_head olist;
> unsigned size;
> uint64_t gpu_addr;
> void *cpu_ptr;
> @@ -396,7 +398,8 @@ struct radeon_sa_bo;
>
> /* sub-allocation buffer */
> struct radeon_sa_bo {
> - struct list_head list;
> + struct list_head olist;
> + struct list_head flist;
> struct radeon_sa_manager *manager;
> unsigned soffset;
> unsigned eoffset;
> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
> index 1748d93..e074ff5 100644
> --- a/drivers/gpu/drm/radeon/radeon_ring.c
> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
> @@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
>
> int radeon_ib_pool_init(struct radeon_device *rdev)
> {
> - struct radeon_sa_manager tmp;
> int i, r;
>
> - r = radeon_sa_bo_manager_init(rdev, &tmp,
> - RADEON_IB_POOL_SIZE*64*1024,
> - RADEON_GEM_DOMAIN_GTT);
> - if (r) {
> - return r;
> - }
> -
> radeon_mutex_lock(&rdev->ib_pool.mutex);
> if (rdev->ib_pool.ready) {
> radeon_mutex_unlock(&rdev->ib_pool.mutex);
> - radeon_sa_bo_manager_fini(rdev, &tmp);
> return 0;
> }
>
> - rdev->ib_pool.sa_manager = tmp;
> - INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
> + r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
> + RADEON_IB_POOL_SIZE*64*1024,
> + RADEON_GEM_DOMAIN_GTT);
> + if (r) {
> + radeon_mutex_unlock(&rdev->ib_pool.mutex);
> + return r;
> + }
> +
> for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
> rdev->ib_pool.ibs[i].fence = NULL;
> rdev->ib_pool.ibs[i].idx = i;
> diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
> index 90ee8ad..757a9d4 100644
> --- a/drivers/gpu/drm/radeon/radeon_sa.c
> +++ b/drivers/gpu/drm/radeon/radeon_sa.c
> @@ -27,21 +27,42 @@
> * Authors:
> * Jerome Glisse <glisse at freedesktop.org>
> */
> +/* Algorithm:
> + *
> + * We store the last allocated bo in "hole", we always try to allocate
> + * after the last allocated bo. Principle is that in a linear GPU ring
> + * progression was is after last is the oldest bo we allocated and thus
> + * the first one that should no longer be in use by the GPU.
> + *
> + * If it's not the case we skip over the bo after last to the closest
> + * done bo if such one exist. If none exist and we are not asked to
> + * block we report failure to allocate.
> + *
> + * If we are asked to block we wait on all the oldest fence of all
> + * rings. We just wait for any of those fence to complete.
> + */
> #include "drmP.h"
> #include "drm.h"
> #include "radeon.h"
>
> +static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
> +static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
> +
> int radeon_sa_bo_manager_init(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager,
> unsigned size, u32 domain)
> {
> - int r;
> + int i, r;
>
> spin_lock_init(&sa_manager->lock);
> sa_manager->bo = NULL;
> sa_manager->size = size;
> sa_manager->domain = domain;
> - INIT_LIST_HEAD(&sa_manager->sa_bo);
> + sa_manager->hole = &sa_manager->olist;
> + INIT_LIST_HEAD(&sa_manager->olist);
> + for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> + INIT_LIST_HEAD(&sa_manager->flist[i]);
> + }
>
> r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
> RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
> @@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
> {
> struct radeon_sa_bo *sa_bo, *tmp;
>
> - if (!list_empty(&sa_manager->sa_bo)) {
> - dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
> + if (!list_empty(&sa_manager->olist)) {
> + sa_manager->hole = &sa_manager->olist,
> + radeon_sa_bo_try_free(sa_manager);
> + if (!list_empty(&sa_manager->olist)) {
> + dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
> + }
> }
> - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
> - list_del_init(&sa_bo->list);
> + list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
> + radeon_sa_bo_remove_locked(sa_bo);
> }
> radeon_bo_unref(&sa_manager->bo);
> sa_manager->size = 0;
> @@ -114,111 +139,181 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
> return r;
> }
>
> -/*
> - * Principe is simple, we keep a list of sub allocation in offset
> - * order (first entry has offset == 0, last entry has the highest
> - * offset).
> - *
> - * When allocating new object we first check if there is room at
> - * the end total_size - (last_object_offset + last_object_size) >=
> - * alloc_size. If so we allocate new object there.
> - *
> - * When there is not enough room at the end, we start waiting for
> - * each sub object until we reach object_offset+object_size >=
> - * alloc_size, this object then become the sub object we return.
> - *
> - * Alignment can't be bigger than page size
> - */
> -
> static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
> {
> - list_del(&sa_bo->list);
> + struct radeon_sa_manager *sa_manager = sa_bo->manager;
> + if (sa_manager->hole == &sa_bo->olist) {
> + sa_manager->hole = sa_bo->olist.prev;
> + }
> + list_del_init(&sa_bo->olist);
> + list_del_init(&sa_bo->flist);
> radeon_fence_unref(&sa_bo->fence);
> kfree(sa_bo);
> }
>
> +static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
> +{
> + struct radeon_sa_bo *sa_bo, *tmp;
> +
> + if (sa_manager->hole->next == &sa_manager->olist)
> + return;
> +
> + sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
> + list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
> + if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
> + return;
> + }
> + radeon_sa_bo_remove_locked(sa_bo);
> + }
> +}
> +
> +static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
> +{
> + struct list_head *hole = sa_manager->hole;
> +
> + if (hole != &sa_manager->olist) {
> + return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
> + }
> + return 0;
> +}
> +
> +static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
> +{
> + struct list_head *hole = sa_manager->hole;
> +
> + if (hole->next != &sa_manager->olist) {
> + return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
> + }
> + return sa_manager->size;
> +}
> +
> +static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
> + struct radeon_sa_bo *sa_bo,
> + unsigned size, unsigned align)
> +{
> + unsigned soffset, eoffset, wasted;
> +
> + soffset = radeon_sa_bo_hole_soffset(sa_manager);
> + eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
> + wasted = (align - (soffset % align)) % align;
> +
> + if ((eoffset - soffset) >= (size + wasted)) {
> + soffset += wasted;
> +
> + sa_bo->manager = sa_manager;
> + sa_bo->soffset = soffset;
> + sa_bo->eoffset = soffset + size;
> + list_add(&sa_bo->olist, sa_manager->hole);
> + INIT_LIST_HEAD(&sa_bo->flist);
> + sa_manager->hole = &sa_bo->olist;
> + return true;
> + }
> + return false;
> +}
> +
> +static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
> + struct radeon_fence **fences)
> +{
> + unsigned i, soffset, best, tmp;
> +
> + /* if hole points to the end of the buffer */
> + if (sa_manager->hole->next == &sa_manager->olist) {
> + /* try again with its beginning */
> + sa_manager->hole = &sa_manager->olist;
> + return true;
> + }
> +
> + soffset = radeon_sa_bo_hole_soffset(sa_manager);
> + /* to handle wrap around we add sa_manager->size */
> + best = sa_manager->size * 2;
> + /* go over all fence list and try to find the closest sa_bo
> + * of the current last
> + */
> + for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> + struct radeon_sa_bo *sa_bo;
> +
> + if (list_empty(&sa_manager->flist[i])) {
> + fences[i] = NULL;
> + continue;
> + }
> +
> + sa_bo = list_first_entry(&sa_manager->flist[i],
> + struct radeon_sa_bo, flist);
> +
> + if (!radeon_fence_signaled(sa_bo->fence)) {
> + fences[i] = sa_bo->fence;
> + continue;
> + }
> +
> + tmp = sa_bo->soffset;
> + if (tmp < soffset) {
> + /* wrap around, pretend it's after */
> + tmp += sa_manager->size;
> + }
> + tmp -= soffset;
> + if (tmp < best) {
> + /* this sa bo is the closest one */
> + best = tmp;
> + sa_manager->hole = sa_bo->olist.prev;
> + }
> +
> + /* we knew that this one is signaled,
> + so it's save to remote it */
> + radeon_sa_bo_remove_locked(sa_bo);
> + }
> + return best != sa_manager->size * 2;
> +}
> +
> int radeon_sa_bo_new(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager,
> struct radeon_sa_bo **sa_bo,
> unsigned size, unsigned align, bool block)
> {
> - struct radeon_fence *fence = NULL;
> - struct radeon_sa_bo *tmp, *next;
> - struct list_head *head;
> - unsigned offset = 0, wasted = 0;
> - int r;
> + struct radeon_fence *fences[RADEON_NUM_RINGS];
> + int r = -ENOMEM;
>
> BUG_ON(align > RADEON_GPU_PAGE_SIZE);
> BUG_ON(size > sa_manager->size);
>
> *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
> -
> -retry:
> + if ((*sa_bo) == NULL) {
> + return -ENOMEM;
> + }
> + (*sa_bo)->manager = sa_manager;
> + (*sa_bo)->fence = NULL;
> + INIT_LIST_HEAD(&(*sa_bo)->olist);
> + INIT_LIST_HEAD(&(*sa_bo)->flist);
>
> spin_lock(&sa_manager->lock);
> + do {
> + /* try to allocate couple time before going to wait */
> + do {
> + radeon_sa_bo_try_free(sa_manager);
>
> - /* no one ? */
> - head = sa_manager->sa_bo.prev;
> - if (list_empty(&sa_manager->sa_bo)) {
> - goto out;
> - }
> -
> - /* look for a hole big enough */
> - offset = 0;
> - list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {
> - /* try to free this object */
> - if (tmp->fence) {
> - if (radeon_fence_signaled(tmp->fence)) {
> - radeon_sa_bo_remove_locked(tmp);
> - continue;
> - } else {
> - fence = tmp->fence;
> + if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
> + size, align)) {
> + spin_unlock(&sa_manager->lock);
> + return 0;
> }
> - }
>
> - /* room before this object ? */
> - if (offset < tmp->soffset && (tmp->soffset - offset) >= size) {
> - head = tmp->list.prev;
> - goto out;
> - }
> - offset = tmp->eoffset;
> - wasted = offset % align;
> - if (wasted) {
> - wasted = align - wasted;
> - }
> - offset += wasted;
> - }
> - /* room at the end ? */
> - head = sa_manager->sa_bo.prev;
> - tmp = list_entry(head, struct radeon_sa_bo, list);
> - offset = tmp->eoffset;
> - wasted = offset % align;
> - if (wasted) {
> - wasted = align - wasted;
> - }
> - offset += wasted;
> - if ((sa_manager->size - offset) < size) {
> - /* failed to find somethings big enough */
> - spin_unlock(&sa_manager->lock);
> - if (block && fence) {
> - r = radeon_fence_wait(fence, false);
> - if (r)
> - return r;
> -
> - goto retry;
> + /* see if we can skip over some allocations */
> + } while (radeon_sa_bo_next_hole(sa_manager, fences));
Here you can infinite loop, in the case there is a bunch of hole in
the allocator but none of them allow to full fill the allocation.
radeon_sa_bo_next_hole will keep returning true looping over and over
on all the all. That's why i only restrict my patch to 2 hole skeeping
and then fails the allocation or try to wait. I believe sadly we need
an heuristic and 2 hole skeeping at most sounded like a good one.
> +
> + if (block) {
> + spin_unlock(&sa_manager->lock);
> + r = radeon_fence_wait_any(rdev, fences, false);
> + spin_lock(&sa_manager->lock);
> + if (r) {
> + goto out_err;
> + }
> }
> - kfree(*sa_bo);
> - *sa_bo = NULL;
> - return -ENOMEM;
> - }
> + } while (block);
>
> -out:
> - (*sa_bo)->manager = sa_manager;
> - (*sa_bo)->soffset = offset;
> - (*sa_bo)->eoffset = offset + size;
> - list_add(&(*sa_bo)->list, head);
> +out_err:
> spin_unlock(&sa_manager->lock);
> - return 0;
> + kfree(*sa_bo);
> + *sa_bo = NULL;
> + return r;
> }
>
> void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
> @@ -226,13 +321,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
> {
> struct radeon_sa_manager *sa_manager;
>
> - if (!sa_bo || !*sa_bo)
> + if (sa_bo == NULL || *sa_bo == NULL) {
> return;
> + }
>
> sa_manager = (*sa_bo)->manager;
> spin_lock(&sa_manager->lock);
> if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
> (*sa_bo)->fence = radeon_fence_ref(fence);
> + list_add_tail(&(*sa_bo)->flist,
> + &sa_manager->flist[fence->ring]);
> } else {
> radeon_sa_bo_remove_locked(*sa_bo);
> }
> @@ -247,15 +345,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
> struct radeon_sa_bo *i;
>
> spin_lock(&sa_manager->lock);
> - list_for_each_entry(i, &sa_manager->sa_bo, list) {
> - seq_printf(m, "[%08x %08x] size %4d (%p)",
> - i->soffset, i->eoffset, i->eoffset - i->soffset, i);
> - if (i->fence) {
> - seq_printf(m, " protected by %Ld (%p) on ring %d\n",
> - i->fence->seq, i->fence, i->fence->ring);
> + list_for_each_entry(i, &sa_manager->olist, olist) {
> + if (&i->olist == sa_manager->hole) {
> + seq_printf(m, ">");
> } else {
> - seq_printf(m, "\n");
> + seq_printf(m, " ");
> + }
> + seq_printf(m, "[0x%08x 0x%08x] size %8d",
> + i->soffset, i->eoffset, i->eoffset - i->soffset);
> + if (i->fence) {
> + seq_printf(m, " protected by 0x%016llx on ring %d",
> + i->fence->seq, i->fence->ring);
> }
> + seq_printf(m, "\n");
> }
> spin_unlock(&sa_manager->lock);
> }
> --
> 1.7.5.4
>
More information about the dri-devel
mailing list