[Intel-gfx] [PATCH] drm/i915/ttm: Fix incorrect assumptions about ttm_bo_validate() semantics

Matthew Auld matthew.william.auld at gmail.com
Fri Jun 18 10:53:55 UTC 2021


On Fri, 18 Jun 2021 at 09:31, Thomas Hellström
<thomas.hellstrom at linux.intel.com> wrote:
>
> We have assumed that if the current placement was not the requested
> placement, but instead one of the busy placements, a TTM move would have
> been triggered. That is not the case.
>
> So when we initially place LMEM objects in "Limbo", (that is system
> placement without any pages allocated), to be able to defer clearing
> objects until first get_pages(), the first get_pages() would happily keep
> objects in system memory if that is one of the allowed placements. And
> since we don't yet support i915 GEM system memory from TTM, everything
> breaks apart.
>
> So make sure we try the requested placement first, if no eviction is
> needed. If that fails, retry with all allowed placements also allowing
> evictions. Also make sure we handle TTM failure codes correctly.
>
> Also temporarily (until we support i915 GEM system on TTM), restrict
> allowed placements to the requested placement to avoid things falling
> apart should LMEM be full.
>
> Fixes: 38f28c0695c0 ("drm/i915/ttm: Calculate the object placement at get_pages time)
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 61 +++++++++++++++++++++++--
>  1 file changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index df46535cca47..4bb0440f693c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -64,6 +64,30 @@ static struct ttm_placement i915_sys_placement = {
>         .busy_placement = &sys_placement_flags,
>  };
>
> +static int i915_ttm_err_to_gem(int err)
> +{
> +       /* Fastpath */
> +       if (likely(!err))
> +               return 0;
> +
> +       switch (err) {
> +       case -EBUSY:
> +               /*
> +                * TTM likes to convert -EDEADLK to -EBUSY, and wants us to
> +                * restart the operation, since we don't record the contending
> +                * lock. We use -EAGAIN to restart.
> +                */
> +               return -EAGAIN;
> +       case -ENOSPC:
> +               /* Memory type / region is full, and we can't evict. */
> +               return -ENXIO;

ttm system will return -ENOMEM right?

Reviewed-by: Matthew Auld <matthew.auld at intel.com>

> +       default:
> +               break;
> +       }
> +
> +       return err;
> +}
> +
>  static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
>
>  static enum ttm_caching
> @@ -522,15 +546,46 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
>         struct sg_table *st;
>         struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS];
>         struct ttm_placement placement;
> +       int real_num_busy;
>         int ret;
>
>         GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS);
>
>         /* Move to the requested placement. */
>         i915_ttm_placement_from_obj(obj, &requested, busy, &placement);
> +
> +       /*
> +        * For now we support LMEM only with TTM.
> +        * TODO: Remove with system support
> +        */
> +       GEM_BUG_ON(requested.mem_type < I915_PL_LMEM0 ||
> +                  busy[0].mem_type < I915_PL_LMEM0);
> +
> +       /* First try only the requested placement. No eviction. */
> +       real_num_busy = fetch_and_zero(&placement.num_busy_placement);
>         ret = ttm_bo_validate(bo, &placement, &ctx);
> -       if (ret)
> -               return ret == -ENOSPC ? -ENXIO : ret;
> +       if (ret) {
> +               ret = i915_ttm_err_to_gem(ret);
> +               /*
> +                * Anything that wants to restart the operation gets to
> +                * do that.
> +                */
> +               if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS ||
> +                   ret == -EAGAIN)
> +                       return ret;
> +
> +               /* TODO: Remove this when we support system as TTM. */
> +               real_num_busy = 1;
> +
> +               /*
> +                * If the initial attempt fails, allow all accepted placements,
> +                * evicting if necessary.
> +                */
> +               placement.num_busy_placement = real_num_busy;
> +               ret = ttm_bo_validate(bo, &placement, &ctx);
> +               if (ret)
> +                       return i915_ttm_err_to_gem(ret);
> +       }
>
>         /* Object either has a page vector or is an iomem object */
>         st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
> @@ -741,5 +796,5 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
>                 obj->ttm.created = true;
>
>         /* i915 wants -ENXIO when out of memory region space. */
> -       return (ret == -ENOSPC) ? -ENXIO : ret;
> +       return i915_ttm_err_to_gem(ret);
>  }
> --
> 2.31.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the dri-devel mailing list