[Intel-gfx] [PATCH 4/5] drm/i915: Cache last obj->pages location for i915_gem_object_get_page()

Fri Feb 13 05:33:40 PST 2015

On 14/01/2015 11:20, Chris Wilson wrote:
> The biggest user of i915_gem_object_get_page() is the relocation
> processing during execbuffer. Typically userspace passes in a set of
> relocations in sorted order. Sadly, we alternate between relocations
> increasing from the start of the buffers, and relocations decreasing
> from the end. However the majority of consecutive lookups will still be
> in the same page. We could cache the start of the last sg chain, however
> for most callers, the entire sgl is inside a single chain and so we see
> no improve from the extra layer of caching.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=88308
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h | 31 ++++++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_gem.c |  4 ++++
>   2 files changed, 30 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 66f0c607dbef..04a7d594d933 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2005,6 +2005,10 @@ struct drm_i915_gem_object {
>   
>   	struct sg_table *pages;
>   	int pages_pin_count;
> +	struct get_page {
> +		struct scatterlist *sg;
> +		int last;
> +	} get_page;
>   
>   	/* prime dma-buf support */
>   	void *dma_buf_vmapping;
> @@ -2612,15 +2616,32 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>   				    int *needs_clflush);
>   
>   int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> -static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
> +
> +static inline int sg_page_count(struct scatterlist *sg)
> +{
> +	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
> +}
> +
> +static inline struct page *
> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
>   {
> -	struct sg_page_iter sg_iter;
> +	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
> +		return NULL;
>   
> -	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n)
> -		return sg_page_iter_page(&sg_iter);
> +	if (n < obj->get_page.last) {
> +		obj->get_page.sg = obj->pages->sgl;
> +		obj->get_page.last = 0;
> +	}
> +
> +	while (obj->get_page.last + sg_page_count(obj->get_page.sg) <= n) {
> +		obj->get_page.last += sg_page_count(obj->get_page.sg);
> +		if (unlikely(sg_is_chain(++obj->get_page.sg)))
Is it safe to do the ++ inside a nested pair of macros? There is at 
least one definition of 'unlikely' in the linux source that would cause 
multiple evaluations.
> +			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
> +	}
>   
> -	return NULL;
> +	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
>   }
> +
>   static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   {
>   	BUG_ON(obj->pages == NULL);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6c403654e33a..d710da099bdb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2260,6 +2260,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   		return ret;
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
> +
> +	obj->get_page.sg = obj->pages->sgl;
> +	obj->get_page.last = 0;
> +
>   	return 0;
>   }
>