[Intel-gfx] [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
John Harrison
John.C.Harrison at Intel.com
Tue Oct 11 10:15:45 UTC 2016
On 11/10/2016 10:32, Tvrtko Ursulin wrote:
>
> On 07/10/2016 10:46, Chris Wilson wrote:
>> A while ago we switched from a contiguous array of pages into an sglist,
>> for that was both more convenient for mapping to hardware and avoided
>> the requirement for a vmalloc array of pages on every object. However,
>> certain GEM API calls (like pwrite, pread as well as performing
>> relocations) do desire access to individual struct pages. A quick hack
>> was to introduce a cache of the last access such that finding the
>> following page was quick - this works so long as the caller desired
>> sequential access. Walking backwards, or multiple callers, still hits a
>> slow linear search for each page. One solution is to store each
>> successful lookup in a radix tree.
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 57 ++++--------
>> drivers/gpu/drm/i915/i915_gem.c | 149
>> ++++++++++++++++++++++++++++----
>> drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-
>> drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
>> 4 files changed, 154 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index bad97f1e5265..a96b446d8db4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
>> struct sg_table *pages;
>> int pages_pin_count;
>> - struct get_page {
>> - struct scatterlist *sg;
>> - int last;
>> + struct i915_gem_object_page_iter {
>> + struct scatterlist *sg_pos;
>> + unsigned long sg_idx;
>> +
>> + struct radix_tree_root radix;
>> + struct mutex lock;
>> } get_page;
>> void *mapping;
>> @@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct
>> scatterlist *sg)
>> return sg->length >> PAGE_SHIFT;
>> }
>> -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
>> -
>> -static inline dma_addr_t
>> -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
>> -{
>> - if (n < obj->get_page.last) {
>> - obj->get_page.sg = obj->pages->sgl;
>> - obj->get_page.last = 0;
>> - }
>> -
>> - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <=
>> n) {
>> - obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> - if (unlikely(sg_is_chain(obj->get_page.sg)))
>> - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> - }
>> -
>> - return sg_dma_address(obj->get_page.sg) + ((n -
>> obj->get_page.last) << PAGE_SHIFT);
>> -}
>> -
>> -static inline struct page *
>> -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> - if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
>> - return NULL;
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> + unsigned long n, unsigned int *offset);
>> - if (n < obj->get_page.last) {
>> - obj->get_page.sg = obj->pages->sgl;
>> - obj->get_page.last = 0;
>> - }
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj,
>> + unsigned long n);
>> - while (obj->get_page.last + __sg_page_count(obj->get_page.sg)
>> <= n) {
>> - obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> - if (unlikely(sg_is_chain(obj->get_page.sg)))
>> - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> - }
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> + unsigned long n);
>> - return nth_page(sg_page(obj->get_page.sg), n -
>> obj->get_page.last);
>> -}
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> + unsigned long n);
>> static inline void i915_gem_object_pin_pages(struct
>> drm_i915_gem_object *obj)
>> {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index ada837e393a7..af7d51f16658 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct
>> drm_i915_gem_object *obj)
>> kfree(obj->pages);
>> }
>> +static void __i915_gem_object_reset_page_iter(struct
>> drm_i915_gem_object *obj)
>> +{
>> + struct radix_tree_iter iter;
>> + void **slot;
>> +
>> + radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
>> + radix_tree_delete(&obj->get_page.radix, iter.index);
>> +}
>> +
>> int
>> i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>> {
>> @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct
>> drm_i915_gem_object *obj)
>> obj->mapping = NULL;
>> }
>> + __i915_gem_object_reset_page_iter(obj);
>> +
>> ops->put_pages(obj);
>> obj->pages = NULL;
>> @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct
>> drm_i915_gem_object *obj)
>> list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>> - obj->get_page.sg = obj->pages->sgl;
>> - obj->get_page.last = 0;
>> + obj->get_page.sg_pos = obj->pages->sgl;
>> + obj->get_page.sg_idx = 0;
>> return 0;
>> }
>> @@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct
>> drm_i915_gem_object *obj,
>> obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
>> obj->madv = I915_MADV_WILLNEED;
>> + INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
>> + mutex_init(&obj->get_page.lock);
>> i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>> }
>> @@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct
>> drm_i915_gem_object *old,
>> }
>> }
>> -/* Like i915_gem_object_get_page(), but mark the returned page
>> dirty */
>> -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> - struct page *page;
>> -
>> - /* Only default objects have per-page dirty tracking */
>> - if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
>> - return NULL;
>> -
>> - page = i915_gem_object_get_page(obj, n);
>> - set_page_dirty(page);
>> - return page;
>> -}
>> -
>> /* Allocate a new GEM object and fill it with the supplied data */
>> struct drm_i915_gem_object *
>> i915_gem_object_create_from_data(struct drm_device *dev,
>> @@ -4959,3 +4957,120 @@ fail:
>> i915_gem_object_put(obj);
>> return ERR_PTR(ret);
>> }
>> +
>> +static struct scatterlist *
>> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> + unsigned long n, unsigned int *offset)
>> +{
>> + struct scatterlist *sg = obj->pages->sgl;
>> + int idx = 0;
>> +
>> + while (idx + __sg_page_count(sg) <= n) {
>> + idx += __sg_page_count(sg++);
>> + if (unlikely(sg_is_chain(sg)))
>> + sg = sg_chain_ptr(sg);
>> + }
>> +
>> + *offset = n - idx;
>> + return sg;
>> +}
>> +
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> + unsigned long n,
>> + unsigned int *offset)
>> +{
>> + struct i915_gem_object_page_iter *iter = &obj->get_page;
>> + struct scatterlist *sg;
>> +
>> + GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
>> + GEM_BUG_ON(obj->pages_pin_count == 0);
>> +
>> + if (n < READ_ONCE(iter->sg_idx))
>> + goto lookup;
>> +
>
> Ok, so on lookup of "n" you build the radix tree for all sg entries
> from zero to n. Therefore for a lookup below the current index, there
> must be a radix tree entry, correct?
That is my understanding.
>
>> + mutex_lock(&iter->lock);
>> + if (n >= iter->sg_idx &&
>> + n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
>> + sg = iter->sg_pos;
>> + *offset = n - iter->sg_idx;
>> + mutex_unlock(&iter->lock);
>> + return sg;
>> + }
>> +
>> + while (iter->sg_idx <= n) {
>> + unsigned long exception;
>> + unsigned int count, i;
>> +
>> + radix_tree_insert(&iter->radix,
>> + iter->sg_idx,
>> + iter->sg_pos);
>> +
>> + exception =
>> + RADIX_TREE_EXCEPTIONAL_ENTRY |
>> + iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
>> + count = __sg_page_count(iter->sg_pos);
>> + for (i = 1; i < count; i++)
>> + radix_tree_insert(&iter->radix,
>> + iter->sg_idx + i,
>> + (void *)exception);
>> +
>> + iter->sg_idx += count;
>> + iter->sg_pos = __sg_next(iter->sg_pos);
>> + }
>> + mutex_unlock(&iter->lock);
>
> Why not avoid falling through the lookup and return the previous sg
> from here?
I thought I worked through this before and decided there was a good
reason. However, looking again a few days later, I can't obviously see
why. As you note below, this is non-trivial stuff and would definitely
benefit from some explanatory comments.
>
>> +
>> +lookup:
>> + rcu_read_lock();
>> + sg = radix_tree_lookup(&iter->radix, n);
>> + rcu_read_unlock();
>> +
>> + if (unlikely(!sg))
>> + return __i915_gem_object_get_sg(obj, n, offset);
>> +
>
> Considering the first observation from above, when does it then happen
> that there is no radix tree entry at this point?
>
> Or in other words, maybe some comments should be added to this patch -
> there are none and it is not that trivial.
>
>> + *offset = 0;
>> + if (unlikely(radix_tree_exception(sg))) {
>> + unsigned long base =
>> + (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
>> + sg = radix_tree_lookup(&iter->radix, base);
>> + *offset = n - base;
>> + }
>> + return sg;
>> +}
>> +
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned
>> long n)
>> +{
>> + struct scatterlist *sg;
>> + unsigned int offset;
>> +
>> + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
>> +
>> + sg = i915_gem_object_get_sg(obj, n, &offset);
>> + return nth_page(sg_page(sg), offset);
>> +}
>> +
>> +/* Like i915_gem_object_get_page(), but mark the returned page dirty */
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> + unsigned long n)
>> +{
>> + struct page *page;
>> +
>> + page = i915_gem_object_get_page(obj, n);
>> + if (!obj->dirty)
>> + set_page_dirty(page);
>> +
>> + return page;
>> +}
>> +
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> + unsigned long n)
>> +{
>> + struct scatterlist *sg;
>> + unsigned int offset;
>> +
>> + sg = i915_gem_object_get_sg(obj, n, &offset);
>> + return sg_dma_address(sg) + (offset << PAGE_SHIFT);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c
>> b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> index 59989e8ee5dc..24bad4e60ef0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device
>> *dev,
>> if (obj->pages == NULL)
>> goto cleanup;
>> - obj->get_page.sg = obj->pages->sgl;
>> - obj->get_page.last = 0;
>> + obj->get_page.sg_pos = obj->pages->sgl;
>> + obj->get_page.sg_idx = 0;
>> i915_gem_object_pin_pages(obj);
>> obj->stolen = stolen;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c
>> b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> index 1c891b92ac80..cb95789da76e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct
>> work_struct *_work)
>> if (ret == 0) {
>> list_add_tail(&obj->global_list,
>> &to_i915(dev)->mm.unbound_list);
>> - obj->get_page.sg = obj->pages->sgl;
>> - obj->get_page.last = 0;
>> + obj->get_page.sg_pos = obj->pages->sgl;
>> + obj->get_page.sg_idx = 0;
>> pinned = 0;
>> }
>> }
>
> Regards,
>
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list