[Intel-gfx] [PATCH 15/18] drm/i915: Add a new "remapped" gtt_view

Ville Syrjälä ville.syrjala at linux.intel.com
Thu Jul 19 19:33:57 UTC 2018


On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjala (2018-07-19 19:22:11)
> > +static struct scatterlist *
> > +remap_pages(const dma_addr_t *in, unsigned int offset,
> > +           unsigned int width, unsigned int height,
> > +           unsigned int stride,
> > +           struct sg_table *st, struct scatterlist *sg)
> > +{
> > +       unsigned int column, row;
> > +
> > +       for (row = 0; row < height; row++) {
> > +               for (column = 0; column < width; column++) {
> > +                       st->nents++;
> > +                       /* We don't need the pages, but need to initialize
> > +                        * the entries so the sg list can be happily traversed.
> > +                        * The only thing we need are DMA addresses.
> > +                        */
> > +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> > +                       sg_dma_address(sg) = in[offset + column];
> > +                       sg_dma_len(sg) = PAGE_SIZE;
> > +                       sg = sg_next(sg);
> 
> Ok. But should be I915_GTT_PAGE_SIZE?

I suppose. And now I wonder what would happen on gen2 with its
2KiB gtt pages. Probably nothing good.

> 
> And yes, following that argument looks like rotation should be converted.
> 
> > +               }
> > +               offset += stride;
> > +       }
> > +
> > +       return sg;
> > +}
> > +
> > +static noinline struct sg_table *
> > +intel_remap_pages(struct intel_remapped_info *rem_info,
> > +                 struct drm_i915_gem_object *obj)
> > +{
> > +       const unsigned long n_pages = obj->base.size / PAGE_SIZE;
> > +       unsigned int size = intel_remapped_info_size(rem_info);
> > +       struct sgt_iter sgt_iter;
> > +       dma_addr_t dma_addr;
> > +       unsigned long i;
> > +       dma_addr_t *page_addr_list;
> > +       struct sg_table *st;
> > +       struct scatterlist *sg;
> > +       int ret = -ENOMEM;
> > +
> > +       /* Allocate a temporary list of source pages for random access. */
> > +       page_addr_list = kvmalloc_array(n_pages,
> > +                                       sizeof(dma_addr_t),
> > +                                       GFP_KERNEL);
> > +       if (!page_addr_list)
> > +               return ERR_PTR(ret);
> > +
> > +       /* Allocate target SG list. */
> > +       st = kmalloc(sizeof(*st), GFP_KERNEL);
> > +       if (!st)
> > +               goto err_st_alloc;
> > +
> > +       ret = sg_alloc_table(st, size, GFP_KERNEL);
> > +       if (ret)
> > +               goto err_sg_alloc;
> > +
> > +       /* Populate source page list from the object. */
> > +       i = 0;
> > +       for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
> > +               page_addr_list[i++] = dma_addr;
> 
> You could use the i915_gem_object_get_dma_address() for this. We
> definitely will reuse the index (as the one object will likely be
> remapped into each CRTC). (Avoids having a temporary vmalloc here.)
> 
> > +
> > +       GEM_BUG_ON(i != n_pages);
> > +       st->nents = 0;
> > +       sg = st->sgl;
> > +
> > +       for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
> > +               sg = remap_pages(page_addr_list, rem_info->plane[i].offset,
> > +                                rem_info->plane[i].width, rem_info->plane[i].height,
> > +                                rem_info->plane[i].stride, st, sg);
> > +       }
> 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index 2a116a91420b..e15ac9f64c36 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -160,6 +160,19 @@ typedef u64 gen8_ppgtt_pml4e_t;
> >  
> >  struct sg_table;
> >  
> > +struct intel_remapped_info {
> > +       struct intel_remapped_plane_info {
> > +               /* tiles */
> > +               unsigned int width, height, stride, offset;
> > +       } plane[2];
> > +       unsigned int unused;
> 
> Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
> it actually is better if it doesn't exist if it isn't used, then it
> should be zero.. Hmm, not sure if that's defined at all, might have to
> say memset and don't rely on {} zeroing?
> 
> Should work fine as a memcmp key for the rbtree.

This whole thing is a bit questionale the way I did it. When populating
the gtt_view I just poke at view->rotated and rely on matching layout
for view->remapped. To make it less magic maybe I should embed one
inside the other?

> 
> > +} __packed;
> > +
> > +static inline void assert_intel_remapped_info_is_packed(void)
> > +{
> > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> > +}
> > +
> >  struct intel_rotation_info {
> >         struct intel_rotation_plane_info {
> >                 /* tiles */
> > @@ -186,6 +199,7 @@ enum i915_ggtt_view_type {
> >         I915_GGTT_VIEW_NORMAL = 0,
> >         I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
> >         I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
> > +       I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
> >  };
> >  
> >  static inline void assert_i915_ggtt_view_type_is_unique(void)
> > @@ -197,6 +211,7 @@ static inline void assert_i915_ggtt_view_type_is_unique(void)
> >         case I915_GGTT_VIEW_NORMAL:
> >         case I915_GGTT_VIEW_PARTIAL:
> >         case I915_GGTT_VIEW_ROTATED:
> > +       case I915_GGTT_VIEW_REMAPPED:
> >                 /* gcc complains if these are identical cases */
> >                 break;
> >         }
> > @@ -208,6 +223,7 @@ struct i915_ggtt_view {
> >                 /* Members need to contain no holes/padding */
> >                 struct intel_partial_info partial;
> >                 struct intel_rotation_info rotated;
> > +               struct intel_remapped_info remapped;
> >         };
> >  };
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 11d834f94220..eb54755ec25b 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -164,6 +164,9 @@ vma_create(struct drm_i915_gem_object *obj,
> >                 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
> >                         vma->size = intel_rotation_info_size(&view->rotated);
> >                         vma->size <<= PAGE_SHIFT;
> > +               } else if (view->type == I915_GGTT_VIEW_REMAPPED) {
> > +                       vma->size = intel_remapped_info_size(&view->remapped);
> > +                       vma->size <<= PAGE_SHIFT;
> >                 }
> >         }
> >  
> > @@ -462,7 +465,8 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
> >          * Explicitly disable for rotated VMA since the display does not
> >          * need the fence and the VMA is not accessible to other users.
> >          */
> > -       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
> > +       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED ||
> > +           vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED)
> >                 return;
> 
> Not fenceable? I suppose, you only want the fence tracking from GGTT
> mmaps. Do we need to make sure FBC finally works on unfenced vma? I
> guess we do.

Might be a nice bonus. I keep putting off touching FBC in a significant
way though because I know it won't stop with the first patch.

> 
> >         fenceable = (vma->node.size >= vma->fence_size &&
> > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> > index f06d66377107..19bb3115226c 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.h
> > +++ b/drivers/gpu/drm/i915/i915_vma.h
> > @@ -257,8 +257,11 @@ i915_vma_compare(struct i915_vma *vma,
> >          */
> >         BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
> >         BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
> > +       BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
> >         BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
> > -                    offsetof(typeof(*view), partial));
> > +                    offsetof(typeof(*view), partial) ||
> > +                    offsetof(typeof(*view), rotated) !=
> > +                    offsetof(typeof(*view), remapped));
> 
> Two separate BUILD_BUG_ON() I think, one pair of offsetof() will be
> harder to interpret should if fail; two pairs never!

ack

-- 
Ville Syrjälä
Intel


More information about the Intel-gfx mailing list