[Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat
Daniel Vetter
daniel at ffwll.ch
Wed Jun 30 17:00:01 UTC 2021
On Wed, Jun 30, 2021 at 6:54 PM Matthew Auld
<matthew.william.auld at gmail.com> wrote:
>
> On Wed, 30 Jun 2021 at 16:27, Thomas Hellström
> <thomas.hellstrom at linux.intel.com> wrote:
> >
> > On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
> > > On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
> > > <thomas.hellstrom at linux.intel.com> wrote:
> > > >
> > > > In order to make the code a bit more readable and to facilitate
> > > > async memcpy moves, reorganize the move code a little. Determine
> > > > at an early stage whether to copy or to clear.
> > > >
> > > > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > > ---
> > > > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++-------
> > > > ----
> > > > 1 file changed, 40 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > index c39d982c4fa6..4e529adcdfc7 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
> > > > drm_i915_gem_object *obj,
> > > > }
> > > >
> > > > static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > > > + bool clear,
> > > > struct ttm_resource *dst_mem,
> > > > struct sg_table *dst_st)
> > > > {
> > > > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
> > > > ttm_buffer_object *bo,
> > > > return -EINVAL;
> > > >
> > > > dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> > > > - if (!ttm || !ttm_tt_is_populated(ttm)) {
> > > > + if (clear) {
> > > > if (bo->type == ttm_bo_type_kernel)
> > > > return -EINVAL;
> > >
> > > Was that meant to be:
> > > return 0:
> > >
> > > ?
> > >
> > > Also does that mean we are incorrectly falling back to memset, for
> > > non-userspace objects, instead of making it a noop?
> >
> > No, we're deliberately falling back to memset for non-userspace
> > objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
> > everything is implemented correctly.
> >
> > >
> > > >
> > > > - if (ttm && !(ttm->page_flags &
> > > > TTM_PAGE_FLAG_ZERO_ALLOC))
> > > > - return 0;
> > > > -
> > > > intel_engine_pm_get(i915->gt.migrate.context-
> > > > >engine);
> > > > ret = intel_context_migrate_clear(i915-
> > > > >gt.migrate.context, NULL,
> > > > dst_st->sgl,
> > > > dst_level,
> > > > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
> > > > ttm_buffer_object *bo,
> > > > return ret;
> > > > }
> > > >
> > > > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > > - struct ttm_operation_ctx *ctx,
> > > > - struct ttm_resource *dst_mem,
> > > > - struct ttm_place *hop)
> > > > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > > > clear,
> > > > + struct ttm_resource *dst_mem,
> > > > + struct sg_table *dst_st)
> > > > {
> > > > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > > - struct ttm_resource_manager *dst_man =
> > > > - ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > > struct intel_memory_region *dst_reg, *src_reg;
> > > > union {
> > > > struct ttm_kmap_iter_tt tt;
> > > > struct ttm_kmap_iter_iomap io;
> > > > } _dst_iter, _src_iter;
> > > > struct ttm_kmap_iter *dst_iter, *src_iter;
> > > > - struct sg_table *dst_st;
> > > > int ret;
> > > >
> > > > dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> > > > src_reg = i915_ttm_region(bo->bdev, bo->resource-
> > > > >mem_type);
> > > > GEM_BUG_ON(!dst_reg || !src_reg);
> > > >
> > > > + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> > > > + if (ret) {
> > >
> > > One future consideration is flat CCS where I don't think we can
> > > easily
> > > fall back to memcpy for userspace objects. Maybe we can make this
> > > fallback conditional on DG1 or !ALLOC_USER for now, or just add a
> > > TODO?
> >
> > Ugh. Is that true for both clearing and copying, or is it only copying?
>
> With clearing I think we are required to nuke the aux CCS state using
> some special blitter command.
>
> For copying/moving I think it's a similar story, where special care
> might be needed for the aux state, which likely requires the blitter.
> Although tbh I don't really remember all the details.
There's more than just flat CCS, for dg2 we'll also support resizeable
BAR with the goal to make the non-mappable lmem available too. Afaik
there's no fallback way to access that memory without a copy engine.
I think on those platforms we simply have to go back to wedging the
driver if reset of the copy engine fails and one our kernel context
was impacted. Nothing really we can do much there. On the big server
gpus we'll have a dedicated copyengine for the kernel reserved, so
pretty unlikely that dies, but on DG2 there's only one.
-Daniel
> > Problem is if we hit an engine reset and fence error during initial
> > clearing / swapin, the plan moving forward is to intercept that and
> > resort to cpu clearing / copying for security reasons. In the worst
> > case we at least need to be able to clear.
> >
> > /Thomas
> >
> >
> > >
> > > > + dst_iter = !cpu_maps_iomem(dst_mem) ?
> > > > + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > > > >ttm) :
> > > > + ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > > > &dst_reg->iomap,
> > > > + dst_st, dst_reg-
> > > > >region.start);
> > > > +
> > > > + src_iter = !cpu_maps_iomem(bo->resource) ?
> > > > + ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > > > >ttm) :
> > > > + ttm_kmap_iter_iomap_init(&_src_iter.io,
> > > > &src_reg->iomap,
> > > > + obj-
> > > > >ttm.cached_io_st,
> > > > + src_reg-
> > > > >region.start);
> > > > +
> > > > + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
> > > > src_iter);
> > > > + }
> > > > +}
> > > > +
> > > > +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > > + struct ttm_operation_ctx *ctx,
> > > > + struct ttm_resource *dst_mem,
> > > > + struct ttm_place *hop)
> > > > +{
> > > > + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > > + struct ttm_resource_manager *dst_man =
> > > > + ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > > + struct ttm_tt *ttm = bo->ttm;
> > > > + struct sg_table *dst_st;
> > > > + bool clear;
> > > > + int ret;
> > > > +
> > > > /* Sync for now. We could do the actual copy async. */
> > > > ret = ttm_bo_wait_ctx(bo, ctx);
> > > > if (ret)
> > > > @@ -526,9 +550,8 @@ static int i915_ttm_move(struct
> > > > ttm_buffer_object *bo, bool evict,
> > > > }
> > > >
> > > > /* Populate ttm with pages if needed. Typically system
> > > > memory. */
> > > > - if (bo->ttm && (dst_man->use_tt ||
> > > > - (bo->ttm->page_flags &
> > > > TTM_PAGE_FLAG_SWAPPED))) {
> > > > - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
> > > > + if (ttm && (dst_man->use_tt || (ttm->page_flags &
> > > > TTM_PAGE_FLAG_SWAPPED))) {
> > > > + ret = ttm_tt_populate(bo->bdev, ttm, ctx);
> > > > if (ret)
> > > > return ret;
> > > > }
> > > > @@ -537,23 +560,10 @@ static int i915_ttm_move(struct
> > > > ttm_buffer_object *bo, bool evict,
> > > > if (IS_ERR(dst_st))
> > > > return PTR_ERR(dst_st);
> > > >
> > > > - ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
> > > > - if (ret) {
> > > > - /* If we start mapping GGTT, we can no longer use
> > > > man::use_tt here. */
> > > > - dst_iter = !cpu_maps_iomem(dst_mem) ?
> > > > - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > > > >ttm) :
> > > > - ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > > > &dst_reg->iomap,
> > > > - dst_st, dst_reg-
> > > > >region.start);
> > > > -
> > > > - src_iter = !cpu_maps_iomem(bo->resource) ?
> > > > - ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > > > >ttm) :
> > > > - ttm_kmap_iter_iomap_init(&_src_iter.io,
> > > > &src_reg->iomap,
> > > > - obj-
> > > > >ttm.cached_io_st,
> > > > - src_reg-
> > > > >region.start);
> > > > + clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
> > > > !ttm_tt_is_populated(ttm));
> > > > + if (!(clear && ttm && !(ttm->page_flags &
> > > > TTM_PAGE_FLAG_ZERO_ALLOC)))
> > >
> > > Seems quite hard to read?
> > >
> > > Reviewed-by: Matthew Auld <matthew.auld at intel.com>
> > >
> > >
> > > > + __i915_ttm_move(bo, clear, dst_mem, dst_st);
> > > >
> > > > - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
> > > > src_iter);
> > > > - }
> > > > - /* Below dst_mem becomes bo->resource. */
> > > > ttm_bo_move_sync_cleanup(bo, dst_mem);
> > > > i915_ttm_adjust_domains_after_move(obj);
> > > > i915_ttm_free_cached_io_st(obj);
> > > > --
> > > > 2.31.1
> > > >
> >
> >
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list