[Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat
Matthew Auld
matthew.william.auld at gmail.com
Wed Jun 30 16:54:19 UTC 2021
On Wed, 30 Jun 2021 at 16:27, Thomas Hellström
<thomas.hellstrom at linux.intel.com> wrote:
>
> On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
> > On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
> > <thomas.hellstrom at linux.intel.com> wrote:
> > >
> > > In order to make the code a bit more readable and to facilitate
> > > async memcpy moves, reorganize the move code a little. Determine
> > > at an early stage whether to copy or to clear.
> > >
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > ---
> > > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
> > > ----
> > > 1 file changed, 40 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > index c39d982c4fa6..4e529adcdfc7 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
> > > drm_i915_gem_object *obj,
> > > }
> > >
> > > static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > > + bool clear,
> > > struct ttm_resource *dst_mem,
> > > struct sg_table *dst_st)
> > > {
> > > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
> > > ttm_buffer_object *bo,
> > > return -EINVAL;
> > >
> > > dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> > > - if (!ttm || !ttm_tt_is_populated(ttm)) {
> > > + if (clear) {
> > > if (bo->type == ttm_bo_type_kernel)
> > > return -EINVAL;
> >
> > Was that meant to be:
> > return 0:
> >
> > ?
> >
> > Also does that mean we are incorrectly falling back to memset, for
> > non-userspace objects, instead of making it a noop?
>
> No, we're deliberately falling back to memset for non-userspace
> objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
> everything is implemented correctly.
>
> >
> > >
> > > - if (ttm && !(ttm->page_flags &
> > > TTM_PAGE_FLAG_ZERO_ALLOC))
> > > - return 0;
> > > -
> > > intel_engine_pm_get(i915->gt.migrate.context-
> > > >engine);
> > > ret = intel_context_migrate_clear(i915-
> > > >gt.migrate.context, NULL,
> > > dst_st->sgl,
> > > dst_level,
> > > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
> > > ttm_buffer_object *bo,
> > > return ret;
> > > }
> > >
> > > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > - struct ttm_operation_ctx *ctx,
> > > - struct ttm_resource *dst_mem,
> > > - struct ttm_place *hop)
> > > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > > clear,
> > > + struct ttm_resource *dst_mem,
> > > + struct sg_table *dst_st)
> > > {
> > > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > - struct ttm_resource_manager *dst_man =
> > > - ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > struct intel_memory_region *dst_reg, *src_reg;
> > > union {
> > > struct ttm_kmap_iter_tt tt;
> > > struct ttm_kmap_iter_iomap io;
> > > } _dst_iter, _src_iter;
> > > struct ttm_kmap_iter *dst_iter, *src_iter;
> > > - struct sg_table *dst_st;
> > > int ret;
> > >
> > > dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> > > src_reg = i915_ttm_region(bo->bdev, bo->resource-
> > > >mem_type);
> > > GEM_BUG_ON(!dst_reg || !src_reg);
> > >
> > > + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> > > + if (ret) {
> >
> > One future consideration is flat CCS where I don't think we can
> > easily
> > fall back to memcpy for userspace objects. Maybe we can make this
> > fallback conditional on DG1 or !ALLOC_USER for now, or just add a
> > TODO?
>
> Ugh. Is that true for both clearing and copying, or is it only copying?
With clearing I think we are required to nuke the aux CCS state using
some special blitter command.
For copying/moving I think it's a similar story, where special care
might be needed for the aux state, which likely requires the blitter.
Although tbh I don't really remember all the details.
>
> Problem is if we hit an engine reset and fence error during initial
> clearing / swapin, the plan moving forward is to intercept that and
> resort to cpu clearing / copying for security reasons. In the worst
> case we at least need to be able to clear.
>
> /Thomas
>
>
> >
> > > + dst_iter = !cpu_maps_iomem(dst_mem) ?
> > > + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > > >ttm) :
> > > + ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > > &dst_reg->iomap,
> > > + dst_st, dst_reg-
> > > >region.start);
> > > +
> > > + src_iter = !cpu_maps_iomem(bo->resource) ?
> > > + ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > > >ttm) :
> > > + ttm_kmap_iter_iomap_init(&_src_iter.io,
> > > &src_reg->iomap,
> > > + obj-
> > > >ttm.cached_io_st,
> > > + src_reg-
> > > >region.start);
> > > +
> > > + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
> > > src_iter);
> > > + }
> > > +}
> > > +
> > > +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > + struct ttm_operation_ctx *ctx,
> > > + struct ttm_resource *dst_mem,
> > > + struct ttm_place *hop)
> > > +{
> > > + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > + struct ttm_resource_manager *dst_man =
> > > + ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > + struct ttm_tt *ttm = bo->ttm;
> > > + struct sg_table *dst_st;
> > > + bool clear;
> > > + int ret;
> > > +
> > > /* Sync for now. We could do the actual copy async. */
> > > ret = ttm_bo_wait_ctx(bo, ctx);
> > > if (ret)
> > > @@ -526,9 +550,8 @@ static int i915_ttm_move(struct
> > > ttm_buffer_object *bo, bool evict,
> > > }
> > >
> > > /* Populate ttm with pages if needed. Typically system
> > > memory. */
> > > - if (bo->ttm && (dst_man->use_tt ||
> > > - (bo->ttm->page_flags &
> > > TTM_PAGE_FLAG_SWAPPED))) {
> > > - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
> > > + if (ttm && (dst_man->use_tt || (ttm->page_flags &
> > > TTM_PAGE_FLAG_SWAPPED))) {
> > > + ret = ttm_tt_populate(bo->bdev, ttm, ctx);
> > > if (ret)
> > > return ret;
> > > }
> > > @@ -537,23 +560,10 @@ static int i915_ttm_move(struct
> > > ttm_buffer_object *bo, bool evict,
> > > if (IS_ERR(dst_st))
> > > return PTR_ERR(dst_st);
> > >
> > > - ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
> > > - if (ret) {
> > > - /* If we start mapping GGTT, we can no longer use
> > > man::use_tt here. */
> > > - dst_iter = !cpu_maps_iomem(dst_mem) ?
> > > - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > > >ttm) :
> > > - ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > > &dst_reg->iomap,
> > > - dst_st, dst_reg-
> > > >region.start);
> > > -
> > > - src_iter = !cpu_maps_iomem(bo->resource) ?
> > > - ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > > >ttm) :
> > > - ttm_kmap_iter_iomap_init(&_src_iter.io,
> > > &src_reg->iomap,
> > > - obj-
> > > >ttm.cached_io_st,
> > > - src_reg-
> > > >region.start);
> > > + clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
> > > !ttm_tt_is_populated(ttm));
> > > + if (!(clear && ttm && !(ttm->page_flags &
> > > TTM_PAGE_FLAG_ZERO_ALLOC)))
> >
> > Seems quite hard to read?
> >
> > Reviewed-by: Matthew Auld <matthew.auld at intel.com>
> >
> >
> > > + __i915_ttm_move(bo, clear, dst_mem, dst_st);
> > >
> > > - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
> > > src_iter);
> > > - }
> > > - /* Below dst_mem becomes bo->resource. */
> > > ttm_bo_move_sync_cleanup(bo, dst_mem);
> > > i915_ttm_adjust_domains_after_move(obj);
> > > i915_ttm_free_cached_io_st(obj);
> > > --
> > > 2.31.1
> > >
>
>
More information about the Intel-gfx
mailing list