[Intel-gfx] [PATCH 04/11] drm/i915: Generalise GPU activity tracking
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jan 30 12:02:02 UTC 2019
On 30/01/2019 02:18, Chris Wilson wrote:
> We currently track GPU memory usage inside VMA, such that we never
> release memory used by the GPU until after it has finished accessing it.
> However, we may want to track other resources aside from VMA, or we may
> want to split a VMA into multiple independent regions and track each
> separately. For this purpose, generalise our request tracking (akin to
> struct reservation_object) so that we can embed it into other objects.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/Makefile | 4 +-
> drivers/gpu/drm/i915/i915_active.c | 226 ++++++++++++++++++
> drivers/gpu/drm/i915/i915_active.h | 66 +++++
> drivers/gpu/drm/i915/i915_active_types.h | 26 ++
> drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +-
> drivers/gpu/drm/i915/i915_vma.c | 173 +++-----------
> drivers/gpu/drm/i915/i915_vma.h | 9 +-
> drivers/gpu/drm/i915/selftests/i915_active.c | 158 ++++++++++++
> .../drm/i915/selftests/i915_live_selftests.h | 3 +-
> 9 files changed, 514 insertions(+), 154 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_active.c
> create mode 100644 drivers/gpu/drm/i915/i915_active.h
> create mode 100644 drivers/gpu/drm/i915/i915_active_types.h
> create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 210d0e8777b6..1787e1299b1b 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
> i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>
> # GEM code
> -i915-y += i915_cmd_parser.o \
> +i915-y += \
> + i915_active.o \
> + i915_cmd_parser.o \
> i915_gem_batch_pool.o \
> i915_gem_clflush.o \
> i915_gem_context.o \
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> new file mode 100644
> index 000000000000..e0182e19cb8b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -0,0 +1,226 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "i915_active.h"
> +
> +#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
> +
> +struct active_node {
> + struct i915_gem_active base;
> + struct i915_active *ref;
> + struct rb_node node;
> + u64 timeline;
> +};
> +
> +static void
> +__active_retire(struct i915_active *ref)
> +{
You wouldn't consider naming this variable 'active' throughout? Ref
reminds me so much of a kref eg. fence->refcount. Although 'active' has
been used for gem_active so far. Not a blocker though, I'll get used to it.
> + GEM_BUG_ON(!ref->count);
> + if (!--ref->count)
> + ref->retire(ref);
> +}
> +
> +static void
> +node_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> + __active_retire(container_of(base, struct active_node, base)->ref);
> +}
> +
> +static void
> +last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> + __active_retire(container_of(base, struct i915_active, last));
> +}
> +
> +static struct i915_gem_active *
> +active_instance(struct i915_active *ref, u64 idx)
> +{
> + struct active_node *node;
> + struct rb_node **p, *parent;
> + struct i915_request *old;
> +
> + /*
> + * We track the most recently used timeline to skip a rbtree search
> + * for the common case, under typical loads we never need the rbtree
> + * at all. We can reuse the last slot if it is empty, that is
> + * after the previous activity has been retired, or if it matches the
> + * current timeline.
> + *
> + * Note that we allow the timeline to be active simultaneously in
> + * the rbtree and the last cache. We do this to avoid having
> + * to search and replace the rbtree element for a new timeline, with
> + * the cost being that we must be aware that the ref may be retired
> + * twice for the same timeline (as the older rbtree element will be
> + * retired before the new request added to last).
> + */
> + old = i915_gem_active_raw(&ref->last, BKL(ref));
> + if (!old || old->fence.context == idx)
> + goto out;
> +
> + /* Move the currently active fence into the rbtree */
> + idx = old->fence.context;
> +
> + parent = NULL;
> + p = &ref->tree.rb_node;
> + while (*p) {
> + parent = *p;
> +
> + node = rb_entry(parent, struct active_node, node);
> + if (node->timeline == idx)
> + goto replace;
> +
> + if (node->timeline < idx)
> + p = &parent->rb_right;
> + else
> + p = &parent->rb_left;
> + }
> +
> + node = kmalloc(sizeof(*node), GFP_KERNEL);
> +
> + /* kmalloc may retire the ref->last (thanks shrinker)! */
> + if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
> + kfree(node);
> + goto out;
> + }
> +
> + if (unlikely(!node))
> + return ERR_PTR(-ENOMEM);
> +
> + init_request_active(&node->base, node_retire);
> + node->ref = ref;
> + node->timeline = idx;
> +
> + rb_link_node(&node->node, parent, p);
> + rb_insert_color(&node->node, &ref->tree);
> +
> +replace:
> + /*
> + * Overwrite the previous active slot in the rbtree with last,
> + * leaving last zeroed. If the previous slot is still active,
> + * we must be careful as we now only expect to receive one retire
> + * callback not two, and so much undo the active counting for the
> + * overwritten slot.
> + */
> + if (i915_gem_active_isset(&node->base)) {
> + /* Retire ourselves from the old rq->active_list */
> + __list_del_entry(&node->base.link);
> + ref->count--;
> + GEM_BUG_ON(!ref->count);
> + }
> + GEM_BUG_ON(list_empty(&ref->last.link));
> + list_replace_init(&ref->last.link, &node->base.link);
> + node->base.request = fetch_and_zero(&ref->last.request);
> +
> +out:
> + return &ref->last;
> +}
> +
> +void i915_active_init(struct drm_i915_private *i915,
> + struct i915_active *ref,
> + void (*retire)(struct i915_active *ref))
> +{
> + ref->i915 = i915;
> + ref->retire = retire;
> + ref->tree = RB_ROOT;
> + init_request_active(&ref->last, last_retire);
> + ref->count = 0;
> +}
> +
> +int i915_active_ref(struct i915_active *ref,
> + u64 timeline,
> + struct i915_request *rq)
> +{
> + struct i915_gem_active *active;
> +
> + active = active_instance(ref, timeline);
> + if (IS_ERR(active))
> + return PTR_ERR(active);
> +
> + if (!i915_gem_active_isset(active))
> + ref->count++;
Could stick a super-paranoid overflow GEM_BUG_ON here.
> + i915_gem_active_set(active, rq);
> +
> + return 0;
> +}
> +
> +bool i915_active_acquire(struct i915_active *ref)
> +{
> + lockdep_assert_held(BKL(ref));
> + return !ref->count++;
> +}
> +
> +void i915_active_release(struct i915_active *ref)
> +{
> + lockdep_assert_held(BKL(ref));
> + __active_retire(ref);
> +}
> +
> +int i915_active_wait(struct i915_active *ref)
> +{
> + struct active_node *it, *n;
> + int ret;
> +
> + ret = i915_gem_active_retire(&ref->last, BKL(ref));
> + if (ret)
> + return ret;
> +
> + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> + ret = i915_gem_active_retire(&it->base, BKL(ref));
> + if (ret)
> + return ret;
> +
> + GEM_BUG_ON(i915_gem_active_isset(&it->base));
> + kfree(it);
> + }
> + ref->tree = RB_ROOT;
> +
> + return 0;
> +}
> +
> +static int __i915_request_await_active(struct i915_request *rq,
> + struct i915_gem_active *active)
> +{
> + struct i915_request *barrier =
> + i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
> +
> + return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
> +}
> +
> +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
> +{
> + struct active_node *it, *n;
> + int ret;
> +
> + ret = __i915_request_await_active(rq, &ref->last);
> + if (ret)
> + return ret;
> +
> + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> + ret = __i915_request_await_active(rq, &it->base);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +void i915_active_fini(struct i915_active *ref)
> +{
> + struct active_node *it, *n;
> +
> + GEM_BUG_ON(i915_gem_active_isset(&ref->last));
> +
> + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> + GEM_BUG_ON(i915_gem_active_isset(&it->base));
> + kfree(it);
> + }
> + ref->tree = RB_ROOT;
> +}
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/i915_active.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> new file mode 100644
> index 000000000000..c0729a046f98
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -0,0 +1,66 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_H_
> +#define _I915_ACTIVE_H_
> +
> +#include "i915_active_types.h"
> +
> +#include <linux/rbtree.h>
> +
> +#include "i915_request.h"
> +
> +/*
> + * GPU activity tracking
> + *
> + * Each set of commands submitted to the GPU compromises a single request that
> + * signals a fence upon completion. struct i915_request combines the
> + * command submission, scheduling and fence signaling roles. If we want to see
> + * if a particular task is complete, we need to grab the fence (struct
> + * i915_request) for that task and check or wait for it to be signaled. More
> + * often though we want to track the status of a bunch of tasks, for example
> + * to wait for the GPU to finish accessing some memory across a variety of
> + * different command pipelines from different clients. We could choose to
> + * track every single request associated with the task, but knowing that
> + * each request belongs to an ordered timeline (later requests within a
> + * timeline must wait for earlier requests), we need only track the
> + * latest request in each timeline to determine the overall status of the
> + * task.
> + *
> + * struct i915_active provides this tracking across timelines. It builds a
> + * composite shared-fence, and is updated as new work is submitted to the task,
> + * forming a snapshot of the current status. It should be embedded into the
> + * different resources that need to track their associated GPU activity to
> + * provide a callback when that GPU activity has ceased, or otherwise to
> + * provide a serialisation point either for request submission or for CPU
> + * synchronisation.
> + */
> +
> +void i915_active_init(struct drm_i915_private *i915,
> + struct i915_active *ref,
> + void (*retire)(struct i915_active *ref));
> +
> +int i915_active_ref(struct i915_active *ref,
> + u64 timeline,
> + struct i915_request *rq);
> +
> +int i915_active_wait(struct i915_active *ref);
> +
> +int i915_request_await_active(struct i915_request *rq,
> + struct i915_active *ref);
> +
> +bool i915_active_acquire(struct i915_active *ref);
> +void i915_active_release(struct i915_active *ref);
> +
> +static inline bool
> +i915_active_is_idle(const struct i915_active *ref)
> +{
> + return !ref->count;
> +}
> +
> +void i915_active_fini(struct i915_active *ref);
> +
> +#endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> new file mode 100644
> index 000000000000..411e502ed8dd
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -0,0 +1,26 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_TYPES_H_
> +#define _I915_ACTIVE_TYPES_H_
> +
> +#include <linux/rbtree.h>
> +
> +#include "i915_request.h"
> +
> +struct drm_i915_private;
> +
> +struct i915_active {
> + struct drm_i915_private *i915;
> +
> + struct rb_root tree;
> + struct i915_gem_active last;
> + unsigned int count;
> +
> + void (*retire)(struct i915_active *ref);
> +};
> +
> +#endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 49b00996a15e..e625659c03a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> if (!vma)
> return ERR_PTR(-ENOMEM);
>
> + i915_active_init(i915, &vma->active, NULL);
> init_request_active(&vma->last_fence, NULL);
>
> vma->vm = &ggtt->vm;
> vma->ops = &pd_vma_ops;
> vma->private = ppgtt;
>
> - vma->active = RB_ROOT;
> -
> vma->size = size;
> vma->fence_size = size;
> vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d83b8ad5f859..d4772061e642 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,22 +63,23 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>
> #endif
>
> -struct i915_vma_active {
> - struct i915_gem_active base;
> - struct i915_vma *vma;
> - struct rb_node node;
> - u64 timeline;
> -};
> +static void obj_bump_mru(struct drm_i915_gem_object *obj)
> +{
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
>
> -static void
> -__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> + spin_lock(&i915->mm.obj_lock);
> + if (obj->bind_count)
> + list_move_tail(&obj->mm.link, &i915->mm.bound_list);
> + spin_unlock(&i915->mm.obj_lock);
> +
> + obj->mm.dirty = true; /* be paranoid */
> +}
> +
> +static void __i915_vma_retire(struct i915_active *ref)
> {
> + struct i915_vma *vma = container_of(ref, typeof(*vma), active);
> struct drm_i915_gem_object *obj = vma->obj;
>
> - GEM_BUG_ON(!i915_vma_is_active(vma));
> - if (--vma->active_count)
> - return;
> -
> GEM_BUG_ON(!i915_gem_object_is_active(obj));
> if (--obj->active_count)
> return;
> @@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> reservation_object_unlock(obj->resv);
> }
>
> - /* Bump our place on the bound list to keep it roughly in LRU order
> + /*
> + * Bump our place on the bound list to keep it roughly in LRU order
> * so that we don't steal from recently used but inactive objects
> * (unless we are forced to ofc!)
> */
> - spin_lock(&rq->i915->mm.obj_lock);
> - if (obj->bind_count)
> - list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
> - spin_unlock(&rq->i915->mm.obj_lock);
> -
> - obj->mm.dirty = true; /* be paranoid */
> + obj_bump_mru(obj);
>
> if (i915_gem_object_has_active_reference(obj)) {
> i915_gem_object_clear_active_reference(obj);
> @@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> }
> }
>
> -static void
> -i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> - struct i915_vma_active *active =
> - container_of(base, typeof(*active), base);
> -
> - __i915_vma_retire(active->vma, rq);
> -}
> -
> -static void
> -i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> - __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> -}
> -
> static struct i915_vma *
> vma_create(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> @@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj,
> if (vma == NULL)
> return ERR_PTR(-ENOMEM);
>
> - vma->active = RB_ROOT;
> -
> - init_request_active(&vma->last_active, i915_vma_last_retire);
> + i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
> init_request_active(&vma->last_fence, NULL);
> +
> vma->vm = vm;
> vma->ops = &vm->vma_ops;
> vma->obj = obj;
> @@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma)
> static void __i915_vma_destroy(struct i915_vma *vma)
> {
> struct drm_i915_private *i915 = vma->vm->i915;
> - struct i915_vma_active *iter, *n;
>
> GEM_BUG_ON(vma->node.allocated);
> GEM_BUG_ON(vma->fence);
> @@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
> spin_unlock(&obj->vma.lock);
> }
>
> - rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> - GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> - kfree(iter);
> - }
> + i915_active_fini(&vma->active);
>
> kmem_cache_free(i915->vmas, vma);
> }
> @@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma,
> reservation_object_unlock(resv);
> }
>
> -static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> -{
> - struct i915_vma_active *active;
> - struct rb_node **p, *parent;
> - struct i915_request *old;
> -
> - /*
> - * We track the most recently used timeline to skip a rbtree search
> - * for the common case, under typical loads we never need the rbtree
> - * at all. We can reuse the last_active slot if it is empty, that is
> - * after the previous activity has been retired, or if the active
> - * matches the current timeline.
> - *
> - * Note that we allow the timeline to be active simultaneously in
> - * the rbtree and the last_active cache. We do this to avoid having
> - * to search and replace the rbtree element for a new timeline, with
> - * the cost being that we must be aware that the vma may be retired
> - * twice for the same timeline (as the older rbtree element will be
> - * retired before the new request added to last_active).
> - */
> - old = i915_gem_active_raw(&vma->last_active,
> - &vma->vm->i915->drm.struct_mutex);
> - if (!old || old->fence.context == idx)
> - goto out;
> -
> - /* Move the currently active fence into the rbtree */
> - idx = old->fence.context;
> -
> - parent = NULL;
> - p = &vma->active.rb_node;
> - while (*p) {
> - parent = *p;
> -
> - active = rb_entry(parent, struct i915_vma_active, node);
> - if (active->timeline == idx)
> - goto replace;
> -
> - if (active->timeline < idx)
> - p = &parent->rb_right;
> - else
> - p = &parent->rb_left;
> - }
> -
> - active = kmalloc(sizeof(*active), GFP_KERNEL);
> -
> - /* kmalloc may retire the vma->last_active request (thanks shrinker)! */
> - if (unlikely(!i915_gem_active_raw(&vma->last_active,
> - &vma->vm->i915->drm.struct_mutex))) {
> - kfree(active);
> - goto out;
> - }
> -
> - if (unlikely(!active))
> - return ERR_PTR(-ENOMEM);
> -
> - init_request_active(&active->base, i915_vma_retire);
> - active->vma = vma;
> - active->timeline = idx;
> -
> - rb_link_node(&active->node, parent, p);
> - rb_insert_color(&active->node, &vma->active);
> -
> -replace:
> - /*
> - * Overwrite the previous active slot in the rbtree with last_active,
> - * leaving last_active zeroed. If the previous slot is still active,
> - * we must be careful as we now only expect to receive one retire
> - * callback not two, and so much undo the active counting for the
> - * overwritten slot.
> - */
> - if (i915_gem_active_isset(&active->base)) {
> - /* Retire ourselves from the old rq->active_list */
> - __list_del_entry(&active->base.link);
> - vma->active_count--;
> - GEM_BUG_ON(!vma->active_count);
> - }
> - GEM_BUG_ON(list_empty(&vma->last_active.link));
> - list_replace_init(&vma->last_active.link, &active->base.link);
> - active->base.request = fetch_and_zero(&vma->last_active.request);
> -
> -out:
> - return &vma->last_active;
> -}
> -
> int i915_vma_move_to_active(struct i915_vma *vma,
> struct i915_request *rq,
> unsigned int flags)
> {
> struct drm_i915_gem_object *obj = vma->obj;
> - struct i915_gem_active *active;
>
> lockdep_assert_held(&rq->i915->drm.struct_mutex);
> GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>
> - active = active_instance(vma, rq->fence.context);
> - if (IS_ERR(active))
> - return PTR_ERR(active);
> -
> /*
> * Add a reference if we're newly entering the active list.
> * The order in which we add operations to the retirement queue is
> @@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> * add the active reference first and queue for it to be dropped
> * *last*.
> */
> - if (!i915_gem_active_isset(active) && !vma->active_count++)
> + if (!vma->active.count)
> obj->active_count++;
> - i915_gem_active_set(active, rq);
> +
> + if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
> + if (!vma->active.count)
> + obj->active_count--;
> + return -ENOMEM;
> + }
Optionally you could make i915_active_ref return the old ref count or
error. Then this could become simpler:
ret = i915_active_ref(..);
if (unlikely(ret < 0))
return -ENOMEM;
else if (ret == 0)
obj->active_count++;
> +
> GEM_BUG_ON(!i915_vma_is_active(vma));
> GEM_BUG_ON(!obj->active_count);
>
> @@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma)
> */
> might_sleep();
> if (i915_vma_is_active(vma)) {
> - struct i915_vma_active *active, *n;
> -
> /*
> * When a closed VMA is retired, it is unbound - eek.
> * In order to prevent it from being recursively closed,
> @@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma)
> */
> __i915_vma_pin(vma);
>
> - ret = i915_gem_active_retire(&vma->last_active,
> - &vma->vm->i915->drm.struct_mutex);
> + ret = i915_active_wait(&vma->active);
> if (ret)
> goto unpin;
>
> - rbtree_postorder_for_each_entry_safe(active, n,
> - &vma->active, node) {
> - ret = i915_gem_active_retire(&active->base,
> - &vma->vm->i915->drm.struct_mutex);
> - if (ret)
> - goto unpin;
> - }
> -
> ret = i915_gem_active_retire(&vma->last_fence,
> &vma->vm->i915->drm.struct_mutex);
> unpin:
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5793abe509a2..3c03d4569481 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -34,6 +34,7 @@
> #include "i915_gem_fence_reg.h"
> #include "i915_gem_object.h"
>
> +#include "i915_active.h"
> #include "i915_request.h"
>
> enum i915_cache_level;
> @@ -108,9 +109,7 @@ struct i915_vma {
> #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
> #define I915_VMA_GGTT_WRITE BIT(15)
>
> - unsigned int active_count;
> - struct rb_root active;
> - struct i915_gem_active last_active;
> + struct i915_active active;
> struct i915_gem_active last_fence;
>
> /**
> @@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
> void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
> #define I915_VMA_RELEASE_MAP BIT(0)
>
> -static inline bool i915_vma_is_active(struct i915_vma *vma)
> +static inline bool i915_vma_is_active(const struct i915_vma *vma)
> {
> - return vma->active_count;
> + return !i915_active_is_idle(&vma->active);
> }
>
> int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> new file mode 100644
> index 000000000000..7c5c3068565b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -0,0 +1,158 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +
> +#include "igt_flush_test.h"
> +#include "lib_sw_fence.h"
> +
> +struct live_active {
> + struct i915_active base;
> + bool retired;
> +};
> +
> +static void __live_active_retire(struct i915_active *base)
> +{
> + struct live_active *active = container_of(base, typeof(*active), base);
> +
> + active->retired = true;
> +}
> +
> +static int __live_active_setup(struct drm_i915_private *i915,
> + struct live_active *active)
> +{
> + struct intel_engine_cs *engine;
> + struct i915_sw_fence *submit;
> + enum intel_engine_id id;
> + unsigned int count = 0;
> + int err = 0;
> +
> + i915_active_init(i915, &active->base, __live_active_retire);
> + active->retired = false;
> +
> + if (!i915_active_acquire(&active->base)) {
> + pr_err("First i915_active_acquire should report being idle\n");
> + return -EINVAL;
> + }
> +
> + submit = heap_fence_create(GFP_KERNEL);
> +
> + for_each_engine(engine, i915, id) {
> + struct i915_request *rq;
> +
> + rq = i915_request_alloc(engine, i915->kernel_context);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + break;
> + }
> +
> + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> + submit,
> + GFP_KERNEL);
> + if (err < 0) {
> + pr_err("Failed to allocate submission fence!\n");
> + i915_request_add(rq);
> + break;
> + }
> +
> + err = i915_active_ref(&active->base, rq->fence.context, rq);
> + if (err) {
> + pr_err("Failed to track active ref!\n");
> + i915_request_add(rq);
> + break;
> + }
> +
> + i915_request_add(rq);
> + count++;
> + }
> +
> + i915_active_release(&active->base);
> + if (active->retired) {
> + pr_err("i915_active retired before submission!\n");
> + err = -EINVAL;
> + }
> + if (active->base.count != count) {
> + pr_err("i915_active not tracking all requests, found %d, expected %d\n",
> + active->base.count, count);
> + err = -EINVAL;
> + }
> +
> + i915_sw_fence_commit(submit);
> + heap_fence_put(submit);
> +
> + return err;
> +}
> +
> +static int live_active_wait(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct live_active active;
> + intel_wakeref_t wakeref;
> + int err;
> +
> + /* Check that we get a callback when requests upon waiting */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + err = __live_active_setup(i915, &active);
> +
> + i915_active_wait(&active.base);
> + if (!active.retired) {
> + pr_err("i915_active not retired after waiting!\n");
> + err = -EINVAL;
> + }
> +
> + i915_active_fini(&active.base);
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> +
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +}
> +
> +static int live_active_retire(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct live_active active;
> + intel_wakeref_t wakeref;
> + int err;
> +
> + /* Check that we get a callback when requests are indirectly retired */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + err = __live_active_setup(i915, &active);
> +
> + /* waits for & retires all requests */
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> +
> + if (!active.retired) {
> + pr_err("i915_active not retired after flushing!\n");
> + err = -EINVAL;
> + }
> +
> + i915_active_fini(&active.base);
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +}
> +
> +int i915_active_live_selftests(struct drm_i915_private *i915)
> +{
> + static const struct i915_subtest tests[] = {
> + SUBTEST(live_active_wait),
> + SUBTEST(live_active_retire),
> + };
> +
> + if (i915_terminally_wedged(&i915->gpu_error))
> + return 0;
> +
> + return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index 76b4f87fc853..6d766925ad04 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -12,8 +12,9 @@
> selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
> selftest(uncore, intel_uncore_live_selftests)
> selftest(workarounds, intel_workarounds_live_selftests)
> -selftest(requests, i915_request_live_selftests)
> selftest(timelines, i915_timeline_live_selftests)
> +selftest(requests, i915_request_live_selftests)
> +selftest(active, i915_active_live_selftests)
> selftest(objects, i915_gem_object_live_selftests)
> selftest(dmabuf, i915_gem_dmabuf_live_selftests)
> selftest(coherency, i915_gem_coherency_live_selftests)
>
Body looks good. I left selftests for after I get a further in the series.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list