[Intel-gfx] [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage

John Harrison John.C.Harrison at Intel.com
Tue Oct 11 10:15:45 UTC 2016


On 11/10/2016 10:32, Tvrtko Ursulin wrote:
>
> On 07/10/2016 10:46, Chris Wilson wrote:
>> A while ago we switched from a contiguous array of pages into an sglist,
>> for that was both more convenient for mapping to hardware and avoided
>> the requirement for a vmalloc array of pages on every object. However,
>> certain GEM API calls (like pwrite, pread as well as performing
>> relocations) do desire access to individual struct pages. A quick hack
>> was to introduce a cache of the last access such that finding the
>> following page was quick - this works so long as the caller desired
>> sequential access. Walking backwards, or multiple callers, still hits a
>> slow linear search for each page. One solution is to store each
>> successful lookup in a radix tree.
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h         |  57 ++++--------
>>   drivers/gpu/drm/i915/i915_gem.c         | 149 
>> ++++++++++++++++++++++++++++----
>>   drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
>>   drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
>>   4 files changed, 154 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index bad97f1e5265..a96b446d8db4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
>>         struct sg_table *pages;
>>       int pages_pin_count;
>> -    struct get_page {
>> -        struct scatterlist *sg;
>> -        int last;
>> +    struct i915_gem_object_page_iter {
>> +        struct scatterlist *sg_pos;
>> +        unsigned long sg_idx;
>> +
>> +        struct radix_tree_root radix;
>> +        struct mutex lock;
>>       } get_page;
>>       void *mapping;
>>   @@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct 
>> scatterlist *sg)
>>       return sg->length >> PAGE_SHIFT;
>>   }
>>   -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
>> -
>> -static inline dma_addr_t
>> -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    if (n < obj->get_page.last) {
>> -        obj->get_page.sg = obj->pages->sgl;
>> -        obj->get_page.last = 0;
>> -    }
>> -
>> -    while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= 
>> n) {
>> -        obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> -        if (unlikely(sg_is_chain(obj->get_page.sg)))
>> -            obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> -    }
>> -
>> -    return sg_dma_address(obj->get_page.sg) + ((n - 
>> obj->get_page.last) << PAGE_SHIFT);
>> -}
>> -
>> -static inline struct page *
>> -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
>> -        return NULL;
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +               unsigned long n, unsigned int *offset);
>>   -    if (n < obj->get_page.last) {
>> -        obj->get_page.sg = obj->pages->sgl;
>> -        obj->get_page.last = 0;
>> -    }
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj,
>> +             unsigned long n);
>>   -    while (obj->get_page.last + __sg_page_count(obj->get_page.sg) 
>> <= n) {
>> -        obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> -        if (unlikely(sg_is_chain(obj->get_page.sg)))
>> -            obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> -    }
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> +                   unsigned long n);
>>   -    return nth_page(sg_page(obj->get_page.sg), n - 
>> obj->get_page.last);
>> -}
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> +                unsigned long n);
>>     static inline void i915_gem_object_pin_pages(struct 
>> drm_i915_gem_object *obj)
>>   {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index ada837e393a7..af7d51f16658 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct 
>> drm_i915_gem_object *obj)
>>       kfree(obj->pages);
>>   }
>>   +static void __i915_gem_object_reset_page_iter(struct 
>> drm_i915_gem_object *obj)
>> +{
>> +    struct radix_tree_iter iter;
>> +    void **slot;
>> +
>> +    radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
>> +        radix_tree_delete(&obj->get_page.radix, iter.index);
>> +}
>> +
>>   int
>>   i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>>   {
>> @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct 
>> drm_i915_gem_object *obj)
>>           obj->mapping = NULL;
>>       }
>>   +    __i915_gem_object_reset_page_iter(obj);
>> +
>>       ops->put_pages(obj);
>>       obj->pages = NULL;
>>   @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct 
>> drm_i915_gem_object *obj)
>>         list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>>   -    obj->get_page.sg = obj->pages->sgl;
>> -    obj->get_page.last = 0;
>> +    obj->get_page.sg_pos = obj->pages->sgl;
>> +    obj->get_page.sg_idx = 0;
>>         return 0;
>>   }
>> @@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct 
>> drm_i915_gem_object *obj,
>>         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
>>       obj->madv = I915_MADV_WILLNEED;
>> +    INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
>> +    mutex_init(&obj->get_page.lock);
>>         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>>   }
>> @@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct 
>> drm_i915_gem_object *old,
>>       }
>>   }
>>   -/* Like i915_gem_object_get_page(), but mark the returned page 
>> dirty */
>> -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    struct page *page;
>> -
>> -    /* Only default objects have per-page dirty tracking */
>> -    if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
>> -        return NULL;
>> -
>> -    page = i915_gem_object_get_page(obj, n);
>> -    set_page_dirty(page);
>> -    return page;
>> -}
>> -
>>   /* Allocate a new GEM object and fill it with the supplied data */
>>   struct drm_i915_gem_object *
>>   i915_gem_object_create_from_data(struct drm_device *dev,
>> @@ -4959,3 +4957,120 @@ fail:
>>       i915_gem_object_put(obj);
>>       return ERR_PTR(ret);
>>   }
>> +
>> +static struct scatterlist *
>> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +             unsigned long n, unsigned int *offset)
>> +{
>> +    struct scatterlist *sg = obj->pages->sgl;
>> +    int idx = 0;
>> +
>> +    while (idx + __sg_page_count(sg) <= n) {
>> +        idx += __sg_page_count(sg++);
>> +        if (unlikely(sg_is_chain(sg)))
>> +            sg = sg_chain_ptr(sg);
>> +    }
>> +
>> +    *offset = n - idx;
>> +    return sg;
>> +}
>> +
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +               unsigned long n,
>> +               unsigned int *offset)
>> +{
>> +    struct i915_gem_object_page_iter *iter = &obj->get_page;
>> +    struct scatterlist *sg;
>> +
>> +    GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
>> +    GEM_BUG_ON(obj->pages_pin_count == 0);
>> +
>> +    if (n < READ_ONCE(iter->sg_idx))
>> +        goto lookup;
>> +
>
> Ok, so on lookup of "n" you build the radix tree for all sg entries 
> from zero to n. Therefore for a lookup below the current index, there 
> must be a radix tree entry, correct?
That is my understanding.

>
>> +    mutex_lock(&iter->lock);
>> +    if (n >= iter->sg_idx &&
>> +        n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
>> +        sg = iter->sg_pos;
>> +        *offset = n - iter->sg_idx;
>> +        mutex_unlock(&iter->lock);
>> +        return sg;
>> +    }
>> +
>> +    while (iter->sg_idx <= n) {
>> +        unsigned long exception;
>> +        unsigned int count, i;
>> +
>> +        radix_tree_insert(&iter->radix,
>> +                  iter->sg_idx,
>> +                  iter->sg_pos);
>> +
>> +        exception =
>> +            RADIX_TREE_EXCEPTIONAL_ENTRY |
>> +            iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
>> +        count = __sg_page_count(iter->sg_pos);
>> +        for (i = 1; i < count; i++)
>> +            radix_tree_insert(&iter->radix,
>> +                      iter->sg_idx + i,
>> +                      (void *)exception);
>> +
>> +        iter->sg_idx += count;
>> +        iter->sg_pos = __sg_next(iter->sg_pos);
>> +    }
>> +    mutex_unlock(&iter->lock);
>
> Why not avoid falling through the lookup and return the previous sg 
> from here?
I thought I worked through this before and decided there was a good 
reason. However, looking again a few days later, I can't obviously see 
why. As you note below, this is non-trivial stuff and would definitely 
benefit from some explanatory comments.

>
>> +
>> +lookup:
>> +    rcu_read_lock();
>> +    sg = radix_tree_lookup(&iter->radix, n);
>> +    rcu_read_unlock();
>> +
>> +    if (unlikely(!sg))
>> +        return __i915_gem_object_get_sg(obj, n, offset);
>> +
>
> Considering the first observation from above, when does it then happen 
> that there is no radix tree entry at this point?
>
> Or in other words, maybe some comments should be added to this patch - 
> there are none and it is not that trivial.
>
>> +    *offset = 0;
>> +    if (unlikely(radix_tree_exception(sg))) {
>> +        unsigned long base =
>> +            (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
>> +        sg = radix_tree_lookup(&iter->radix, base);
>> +        *offset = n - base;
>> +    }
>> +    return sg;
>> +}
>> +
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned 
>> long n)
>> +{
>> +    struct scatterlist *sg;
>> +    unsigned int offset;
>> +
>> +    GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
>> +
>> +    sg = i915_gem_object_get_sg(obj, n, &offset);
>> +    return nth_page(sg_page(sg), offset);
>> +}
>> +
>> +/* Like i915_gem_object_get_page(), but mark the returned page dirty */
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> +                   unsigned long n)
>> +{
>> +    struct page *page;
>> +
>> +    page = i915_gem_object_get_page(obj, n);
>> +    if (!obj->dirty)
>> +        set_page_dirty(page);
>> +
>> +    return page;
>> +}
>> +
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> +                unsigned long n)
>> +{
>> +    struct scatterlist *sg;
>> +    unsigned int offset;
>> +
>> +    sg = i915_gem_object_get_sg(obj, n, &offset);
>> +    return sg_dma_address(sg) + (offset << PAGE_SHIFT);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c 
>> b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> index 59989e8ee5dc..24bad4e60ef0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device 
>> *dev,
>>       if (obj->pages == NULL)
>>           goto cleanup;
>>   -    obj->get_page.sg = obj->pages->sgl;
>> -    obj->get_page.last = 0;
>> +    obj->get_page.sg_pos = obj->pages->sgl;
>> +    obj->get_page.sg_idx = 0;
>>         i915_gem_object_pin_pages(obj);
>>       obj->stolen = stolen;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
>> b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> index 1c891b92ac80..cb95789da76e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct 
>> work_struct *_work)
>>               if (ret == 0) {
>>                   list_add_tail(&obj->global_list,
>> &to_i915(dev)->mm.unbound_list);
>> -                obj->get_page.sg = obj->pages->sgl;
>> -                obj->get_page.last = 0;
>> +                obj->get_page.sg_pos = obj->pages->sgl;
>> +                obj->get_page.sg_idx = 0;
>>                   pinned = 0;
>>               }
>>           }
>
> Regards,
>
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx



More information about the Intel-gfx mailing list