[Intel-gfx] [PATCH] drm/i915: Use i915_gem_object_get_dma_address() to populate rotated vmas
Ville Syrjälä
ville.syrjala at linux.intel.com
Tue Oct 16 18:18:34 UTC 2018
On Tue, Oct 16, 2018 at 06:54:59PM +0100, Tvrtko Ursulin wrote:
>
> On 16/10/2018 16:04, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> >
> > Replace the kvmalloc_array() with i915_gem_object_get_dma_address() when
> > populating rotated vmas. One random access mechanism ought to be enough
> > for everyone?
> >
> > To calculate the size of the radix tree I think we can do
> > something like this (assuming 64bit pointers):
> > num_pages = obj_size / 4096
> > tree_height = ceil(log64(num_pages))
> > num_nodes = sum(64^n, n, 0, tree_height-1)
> > tree_size = num_nodes * 576
>
> For 1920x1080x4 I get around 19k in real life - does that match your
> math?
The math assumes the second level of tree is fully populated so
gives me ~37KiB. In this case we have only 2025 pages so we should
only need 33 nodes in total instead of the full 65. 33*576 is ~19KiB.
> For something like two UHD screens with double buffering I guess
> that's around 4 * 2 * 2 = 16 times more, which is perhaps a worst case
> setup? So ~300KiB permanently pinned and likely not used post first pin
> to display. If that is OK with you then you have my ack for the patch.
I think I'm OK with it. And Chris can optimize it later ;)
If we're really short on memory the shrinker should be able to get
rid of this when it's going to swap out the pages. For any fb that's
not currently pinned that is.
> Or a review if you can live with some delay.
>
> Regards,
>
> Tvrtko
>
> > If we compare that with the object size we should get a relative
> > overhead of around .2% to 1% for reasonable sized objects,
> > which framebuffers tend to be.
> >
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> > Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem_gtt.c | 31 ++++++-----------------------
> > 1 file changed, 6 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 29ca9007a704..98d9a1eb1ed2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -3637,7 +3637,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
> > }
> >
> > static struct scatterlist *
> > -rotate_pages(const dma_addr_t *in, unsigned int offset,
> > +rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
> > unsigned int width, unsigned int height,
> > unsigned int stride,
> > struct sg_table *st, struct scatterlist *sg)
> > @@ -3646,7 +3646,7 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
> > unsigned int src_idx;
> >
> > for (column = 0; column < width; column++) {
> > - src_idx = stride * (height - 1) + column;
> > + src_idx = stride * (height - 1) + column + offset;
> > for (row = 0; row < height; row++) {
> > st->nents++;
> > /* We don't need the pages, but need to initialize
> > @@ -3654,7 +3654,8 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
> > * The only thing we need are DMA addresses.
> > */
> > sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
> > - sg_dma_address(sg) = in[offset + src_idx];
> > + sg_dma_address(sg) =
> > + i915_gem_object_get_dma_address(obj, src_idx);
> > sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
> > sg = sg_next(sg);
> > src_idx -= stride;
> > @@ -3668,22 +3669,11 @@ static noinline struct sg_table *
> > intel_rotate_pages(struct intel_rotation_info *rot_info,
> > struct drm_i915_gem_object *obj)
> > {
> > - const unsigned long n_pages = obj->base.size / I915_GTT_PAGE_SIZE;
> > unsigned int size = intel_rotation_info_size(rot_info);
> > - struct sgt_iter sgt_iter;
> > - dma_addr_t dma_addr;
> > - unsigned long i;
> > - dma_addr_t *page_addr_list;
> > struct sg_table *st;
> > struct scatterlist *sg;
> > int ret = -ENOMEM;
> > -
> > - /* Allocate a temporary list of source pages for random access. */
> > - page_addr_list = kvmalloc_array(n_pages,
> > - sizeof(dma_addr_t),
> > - GFP_KERNEL);
> > - if (!page_addr_list)
> > - return ERR_PTR(ret);
> > + int i;
> >
> > /* Allocate target SG list. */
> > st = kmalloc(sizeof(*st), GFP_KERNEL);
> > @@ -3694,29 +3684,20 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
> > if (ret)
> > goto err_sg_alloc;
> >
> > - /* Populate source page list from the object. */
> > - i = 0;
> > - for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
> > - page_addr_list[i++] = dma_addr;
> > -
> > - GEM_BUG_ON(i != n_pages);
> > st->nents = 0;
> > sg = st->sgl;
> >
> > for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
> > - sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
> > + sg = rotate_pages(obj, rot_info->plane[i].offset,
> > rot_info->plane[i].width, rot_info->plane[i].height,
> > rot_info->plane[i].stride, st, sg);
> > }
> >
> > - kvfree(page_addr_list);
> > -
> > return st;
> >
> > err_sg_alloc:
> > kfree(st);
> > err_st_alloc:
> > - kvfree(page_addr_list);
> >
> > DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
> > obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
> >
--
Ville Syrjälä
Intel
More information about the Intel-gfx
mailing list