[Intel-gfx] [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Oct 11 09:32:10 UTC 2016
On 07/10/2016 10:46, Chris Wilson wrote:
> A while ago we switched from a contiguous array of pages into an sglist,
> for that was both more convenient for mapping to hardware and avoided
> the requirement for a vmalloc array of pages on every object. However,
> certain GEM API calls (like pwrite, pread as well as performing
> relocations) do desire access to individual struct pages. A quick hack
> was to introduce a cache of the last access such that finding the
> following page was quick - this works so long as the caller desired
> sequential access. Walking backwards, or multiple callers, still hits a
> slow linear search for each page. One solution is to store each
> successful lookup in a radix tree.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 57 ++++--------
> drivers/gpu/drm/i915/i915_gem.c | 149 ++++++++++++++++++++++++++++----
> drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-
> drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
> 4 files changed, 154 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bad97f1e5265..a96b446d8db4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
>
> struct sg_table *pages;
> int pages_pin_count;
> - struct get_page {
> - struct scatterlist *sg;
> - int last;
> + struct i915_gem_object_page_iter {
> + struct scatterlist *sg_pos;
> + unsigned long sg_idx;
> +
> + struct radix_tree_root radix;
> + struct mutex lock;
> } get_page;
> void *mapping;
>
> @@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct scatterlist *sg)
> return sg->length >> PAGE_SHIFT;
> }
>
> -struct page *
> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
> -
> -static inline dma_addr_t
> -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
> -{
> - if (n < obj->get_page.last) {
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> - }
> -
> - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
> - obj->get_page.last += __sg_page_count(obj->get_page.sg++);
> - if (unlikely(sg_is_chain(obj->get_page.sg)))
> - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
> - }
> -
> - return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
> -}
> -
> -static inline struct page *
> -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
> -{
> - if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
> - return NULL;
> +struct scatterlist *
> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> + unsigned long n, unsigned int *offset);
>
> - if (n < obj->get_page.last) {
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> - }
> +struct page *
> +i915_gem_object_get_page(struct drm_i915_gem_object *obj,
> + unsigned long n);
>
> - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
> - obj->get_page.last += __sg_page_count(obj->get_page.sg++);
> - if (unlikely(sg_is_chain(obj->get_page.sg)))
> - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
> - }
> +struct page *
> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
> + unsigned long n);
>
> - return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
> -}
> +dma_addr_t
> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
> + unsigned long n);
>
> static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ada837e393a7..af7d51f16658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
> kfree(obj->pages);
> }
>
> +static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
> +{
> + struct radix_tree_iter iter;
> + void **slot;
> +
> + radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
> + radix_tree_delete(&obj->get_page.radix, iter.index);
> +}
> +
> int
> i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> {
> @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> obj->mapping = NULL;
> }
>
> + __i915_gem_object_reset_page_iter(obj);
> +
> ops->put_pages(obj);
> obj->pages = NULL;
>
> @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>
> list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> + obj->get_page.sg_pos = obj->pages->sgl;
> + obj->get_page.sg_idx = 0;
>
> return 0;
> }
> @@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>
> obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
> obj->madv = I915_MADV_WILLNEED;
> + INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
> + mutex_init(&obj->get_page.lock);
>
> i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
> }
> @@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
> }
> }
>
> -/* Like i915_gem_object_get_page(), but mark the returned page dirty */
> -struct page *
> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
> -{
> - struct page *page;
> -
> - /* Only default objects have per-page dirty tracking */
> - if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
> - return NULL;
> -
> - page = i915_gem_object_get_page(obj, n);
> - set_page_dirty(page);
> - return page;
> -}
> -
> /* Allocate a new GEM object and fill it with the supplied data */
> struct drm_i915_gem_object *
> i915_gem_object_create_from_data(struct drm_device *dev,
> @@ -4959,3 +4957,120 @@ fail:
> i915_gem_object_put(obj);
> return ERR_PTR(ret);
> }
> +
> +static struct scatterlist *
> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> + unsigned long n, unsigned int *offset)
> +{
> + struct scatterlist *sg = obj->pages->sgl;
> + int idx = 0;
> +
> + while (idx + __sg_page_count(sg) <= n) {
> + idx += __sg_page_count(sg++);
> + if (unlikely(sg_is_chain(sg)))
> + sg = sg_chain_ptr(sg);
> + }
> +
> + *offset = n - idx;
> + return sg;
> +}
> +
> +struct scatterlist *
> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> + unsigned long n,
> + unsigned int *offset)
> +{
> + struct i915_gem_object_page_iter *iter = &obj->get_page;
> + struct scatterlist *sg;
> +
> + GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
> + GEM_BUG_ON(obj->pages_pin_count == 0);
> +
> + if (n < READ_ONCE(iter->sg_idx))
> + goto lookup;
> +
Ok, so on lookup of "n" you build the radix tree for all sg entries from
zero to n. Therefore for a lookup below the current index, there must be
a radix tree entry, correct?
> + mutex_lock(&iter->lock);
> + if (n >= iter->sg_idx &&
> + n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
> + sg = iter->sg_pos;
> + *offset = n - iter->sg_idx;
> + mutex_unlock(&iter->lock);
> + return sg;
> + }
> +
> + while (iter->sg_idx <= n) {
> + unsigned long exception;
> + unsigned int count, i;
> +
> + radix_tree_insert(&iter->radix,
> + iter->sg_idx,
> + iter->sg_pos);
> +
> + exception =
> + RADIX_TREE_EXCEPTIONAL_ENTRY |
> + iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
> + count = __sg_page_count(iter->sg_pos);
> + for (i = 1; i < count; i++)
> + radix_tree_insert(&iter->radix,
> + iter->sg_idx + i,
> + (void *)exception);
> +
> + iter->sg_idx += count;
> + iter->sg_pos = __sg_next(iter->sg_pos);
> + }
> + mutex_unlock(&iter->lock);
Why not avoid falling through the lookup and return the previous sg from
here?
> +
> +lookup:
> + rcu_read_lock();
> + sg = radix_tree_lookup(&iter->radix, n);
> + rcu_read_unlock();
> +
> + if (unlikely(!sg))
> + return __i915_gem_object_get_sg(obj, n, offset);
> +
Considering the first observation from above, when does it then happen
that there is no radix tree entry at this point?
Or in other words, maybe some comments should be added to this patch -
there are none and it is not that trivial.
> + *offset = 0;
> + if (unlikely(radix_tree_exception(sg))) {
> + unsigned long base =
> + (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
> + sg = radix_tree_lookup(&iter->radix, base);
> + *offset = n - base;
> + }
> + return sg;
> +}
> +
> +struct page *
> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned long n)
> +{
> + struct scatterlist *sg;
> + unsigned int offset;
> +
> + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
> +
> + sg = i915_gem_object_get_sg(obj, n, &offset);
> + return nth_page(sg_page(sg), offset);
> +}
> +
> +/* Like i915_gem_object_get_page(), but mark the returned page dirty */
> +struct page *
> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
> + unsigned long n)
> +{
> + struct page *page;
> +
> + page = i915_gem_object_get_page(obj, n);
> + if (!obj->dirty)
> + set_page_dirty(page);
> +
> + return page;
> +}
> +
> +dma_addr_t
> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
> + unsigned long n)
> +{
> + struct scatterlist *sg;
> + unsigned int offset;
> +
> + sg = i915_gem_object_get_sg(obj, n, &offset);
> + return sg_dma_address(sg) + (offset << PAGE_SHIFT);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 59989e8ee5dc..24bad4e60ef0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> if (obj->pages == NULL)
> goto cleanup;
>
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> + obj->get_page.sg_pos = obj->pages->sgl;
> + obj->get_page.sg_idx = 0;
>
> i915_gem_object_pin_pages(obj);
> obj->stolen = stolen;
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 1c891b92ac80..cb95789da76e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> if (ret == 0) {
> list_add_tail(&obj->global_list,
> &to_i915(dev)->mm.unbound_list);
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> + obj->get_page.sg_pos = obj->pages->sgl;
> + obj->get_page.sg_idx = 0;
> pinned = 0;
> }
> }
Regards,
Tvrtko
More information about the Intel-gfx
mailing list