[PATCH v2 3/4] drm/i915/ttm: Calculate the object placement at get_pages time
Matthew Auld
matthew.william.auld at gmail.com
Fri Jun 11 15:42:07 UTC 2021
On Fri, 11 Jun 2021 at 15:55, Thomas Hellström
<thomas.hellstrom at linux.intel.com> wrote:
>
> Instead of relying on a static placement, calculate at get_pages() time.
> This should work for LMEM regions and system for now. For stolen we need
> to take preallocated range into account. That well be added later.
That will be
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> v2:
> - Fixed a style issue (Reported by Matthew Auld)
> ---
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 92 ++++++++++++++++++-------
> drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++-
> drivers/gpu/drm/i915/intel_region_ttm.h | 2 +
> 3 files changed, 75 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 45ef1d101937..fd3d11728229 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -24,6 +24,11 @@
> #define I915_TTM_PRIO_NO_PAGES 1
> #define I915_TTM_PRIO_HAS_PAGES 2
>
> +/*
> + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
> + */
> +#define I915_TTM_MAX_PLACEMENTS 10
I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN ?
> +
> /**
> * struct i915_ttm_tt - TTM page vector with additional private information
> * @ttm: The base TTM page vector.
> @@ -42,32 +47,18 @@ struct i915_ttm_tt {
> struct sg_table *cached_st;
> };
>
> -static const struct ttm_place lmem0_sys_placement_flags[] = {
> - {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = I915_PL_LMEM0,
> - .flags = 0,
> - }, {
> - .fpfn = 0,
> - .lpfn = 0,
> - .mem_type = I915_PL_SYSTEM,
> - .flags = 0,
> - }
> -};
> -
> -static struct ttm_placement i915_lmem0_placement = {
> - .num_placement = 1,
> - .placement = &lmem0_sys_placement_flags[0],
> - .num_busy_placement = 1,
> - .busy_placement = &lmem0_sys_placement_flags[0],
> +static const struct ttm_place sys_placement_flags = {
> + .fpfn = 0,
> + .lpfn = 0,
> + .mem_type = I915_PL_SYSTEM,
> + .flags = 0,
> };
>
> static struct ttm_placement i915_sys_placement = {
> .num_placement = 1,
> - .placement = &lmem0_sys_placement_flags[1],
> + .placement = &sys_placement_flags,
> .num_busy_placement = 1,
> - .busy_placement = &lmem0_sys_placement_flags[1],
> + .busy_placement = &sys_placement_flags,
> };
>
> static bool gpu_binds_iomem(struct ttm_resource *mem)
> @@ -83,6 +74,55 @@ static bool cpu_maps_iomem(struct ttm_resource *mem)
>
> static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
>
> +static enum ttm_caching
> +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
> +{
> + /*
> + * Objects only allowed in system get cached cpu-mappings.
> + * Other objects get WC mapping for now. Even if in system.
> + */
> + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
> + obj->mm.n_placements <= 1)
> + return ttm_cached;
> +
> + return ttm_write_combined;
> +}
> +
> +static void
> +i915_ttm_place_from_region(const struct intel_memory_region *mr,
> + struct ttm_place *place)
> +{
> + memset(place, 0, sizeof(*place));
> + place->mem_type = intel_region_to_ttm_type(mr);
> +}
> +
> +static void
> +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
> + struct ttm_place *requested,
> + struct ttm_place *busy,
> + struct ttm_placement *placement)
> +{
> + unsigned int num_allowed = obj->mm.n_placements;
> + unsigned int i;
> +
> + placement->num_placement = 1;
> + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
> + obj->mm.region, requested);
> +
> + /* Cache this on object? */
> + placement->num_busy_placement = num_allowed;
> + for (i = 0; i < placement->num_busy_placement; ++i)
> + i915_ttm_place_from_region(obj->mm.placements[i], busy + i);
> +
> + if (num_allowed == 0) {
> + *busy = *requested;
> + placement->num_busy_placement = 1;
> + }
> +
> + placement->placement = requested;
> + placement->busy_placement = busy;
> +}
> +
> static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
> uint32_t page_flags)
> {
> @@ -100,7 +140,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
> man->use_tt)
> page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
>
> - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, ttm_write_combined);
> + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
> + i915_ttm_select_tt_caching(obj));
> if (ret) {
> kfree(i915_tt);
> return NULL;
> @@ -465,10 +506,13 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
> .no_wait_gpu = false,
> };
> struct sg_table *st;
> + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS];
> + struct ttm_placement placement;
> int ret;
GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS)?
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
More information about the dri-devel
mailing list