[PATCH] drm/i915: Optionally manage system memory with TTM and poolalloc
Adrian Larumbe
adrian.larumbe at collabora.com
Wed Jul 27 23:08:00 UTC 2022
Adds a module parameter that enables selection of the memory region manager
for system memory, either the legacy shmem-based one or TTM, through its
pool allocator. This could should not affect how DGFX platforms with LMEM
work.
Signed-off-by: Adrian Larumbe <adrian.larumbe at collabora.com>
---
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 56 +++++++---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 104 +++++++++++++++++-
drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 9 ++
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 13 ++-
.../drm/i915/gem/selftests/i915_gem_mman.c | 2 +-
drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 6 +-
drivers/gpu/drm/i915/i915_params.c | 6 +
drivers/gpu/drm/i915/i915_params.h | 4 +-
drivers/gpu/drm/i915/intel_memory_region.c | 12 +-
drivers/gpu/drm/i915/intel_region_ttm.c | 6 +-
10 files changed, 191 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 0c5c43852e24..b8ae6a381108 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -83,6 +83,22 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
if (!obj)
return -ENOENT;
+ if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+ addr = -EINVAL;
+ goto err;
+ }
+
+ if (i915_gem_object_is_ttm(obj)) {
+ GEM_WARN_ON(!i915->params.use_pool_alloc);
+
+ addr = i915_gem_ttm_mmap(obj, args);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
+ args->addr_ptr = (u64)addr;
+ return 0;
+ }
+
/* prime objects have no backing filp to GEM mmap
* pages from.
*/
@@ -91,11 +107,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
goto err;
}
- if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
- addr = -EINVAL;
- goto err;
- }
-
addr = vm_mmap(obj->base.filp, 0, args->size,
PROT_READ | PROT_WRITE, MAP_SHARED,
args->offset);
@@ -552,9 +563,11 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct i915_mmap_offset *mmo, *mn;
- if (obj->ops->unmap_virtual)
+ if (obj->ops->unmap_virtual &&
+ bo->type == ttm_bo_type_device)
obj->ops->unmap_virtual(obj);
spin_lock(&obj->mmo.lock);
@@ -641,11 +654,13 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
enum i915_mmap_type mmap_type,
struct drm_file *file)
{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_mmap_offset *mmo;
int err;
- GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops);
+ GEM_BUG_ON((obj->ops->mmap_offset || obj->ops->mmap_ops) &&
+ bo->type == ttm_bo_type_device);
mmo = lookup_mmo(obj, mmap_type);
if (mmo)
@@ -694,12 +709,14 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
enum i915_mmap_type mmap_type,
u64 *offset, struct drm_file *file)
{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct i915_mmap_offset *mmo;
if (i915_gem_object_never_mmap(obj))
return -ENODEV;
- if (obj->ops->mmap_offset) {
+ if (obj->ops->mmap_offset &&
+ bo->type == ttm_bo_type_device) {
if (mmap_type != I915_MMAP_TYPE_FIXED)
return -ENODEV;
@@ -731,7 +748,6 @@ __assign_mmap_offset_handle(struct drm_file *file,
{
struct drm_i915_gem_object *obj;
int err;
-
obj = i915_gem_object_lookup(file, handle);
if (!obj)
return -ENOENT;
@@ -739,6 +755,7 @@ __assign_mmap_offset_handle(struct drm_file *file,
err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out_put;
+
err = __assign_mmap_offset(obj, mmap_type, offset, file);
i915_gem_object_unlock(obj);
out_put:
@@ -922,7 +939,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
struct drm_vma_offset_node *node;
struct drm_file *priv = filp->private_data;
struct drm_device *dev = priv->minor->dev;
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_object *obj = NULL;
+ struct ttm_buffer_object *bo = NULL;
struct i915_mmap_offset *mmo = NULL;
struct file *anon;
@@ -944,7 +963,8 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
mmo = container_of(node, struct i915_mmap_offset, vma_node);
obj = i915_gem_object_get_rcu(mmo->obj);
- GEM_BUG_ON(obj && obj->ops->mmap_ops);
+ if (!i915->params.use_pool_alloc)
+ GEM_BUG_ON(obj && obj->ops->mmap_ops);
} else {
obj = i915_gem_object_get_rcu
(container_of(node, struct drm_i915_gem_object,
@@ -958,6 +978,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
if (!obj)
return node ? -EACCES : -EINVAL;
+ if (i915_gem_object_is_ttm(obj))
+ bo = i915_gem_to_ttm(obj);
+
if (i915_gem_object_is_readonly(obj)) {
if (vma->vm_flags & VM_WRITE) {
i915_gem_object_put(obj);
@@ -987,10 +1010,15 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
fput(anon);
if (obj->ops->mmap_ops) {
- vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
- vma->vm_ops = obj->ops->mmap_ops;
- vma->vm_private_data = node->driver_private;
- return 0;
+ /* there could be an obj backend with mmap_ops that isn't TTM */
+ if (!i915_gem_object_is_ttm(obj) ||
+ (i915_gem_object_is_ttm(obj) &&
+ bo->type == ttm_bo_type_device)) {
+ vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = obj->ops->mmap_ops;
+ vma->vm_private_data = node->driver_private;
+ return 0;
+ }
}
vma->vm_private_data = mmo;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..7ae22c522468 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -4,9 +4,11 @@
*/
#include <linux/shmem_fs.h>
+#include <linux/mman.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_bo_api.h>
#include <drm/drm_buddy.h>
#include "i915_drv.h"
@@ -20,6 +22,8 @@
#include "gem/i915_gem_ttm.h"
#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
+#include "gem/i915_gem_clflush.h"
+#include "gem/i915_gem_tiling.h"
#include "gt/intel_gpu_commands.h"
#define I915_TTM_PRIO_PURGE 0
@@ -207,6 +211,11 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
return PTR_ERR(filp);
mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+ if (IS_I965GM(i915) || IS_I965G(i915)) {
+ /* 965gm cannot relocate objects above 4GiB. */
+ mask &= ~__GFP_HIGHMEM;
+ mask |= __GFP_DMA32;
+ }
mapping = filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
@@ -294,7 +303,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
page_flags |= TTM_TT_FLAG_EXTERNAL |
TTM_TT_FLAG_EXTERNAL_MAPPABLE;
- i915_tt->is_shmem = true;
+
+ i915_tt->is_shmem = i915->params.use_pool_alloc ? false : true;
}
if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj))
@@ -513,9 +523,7 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
return 0;
- GEM_BUG_ON(!i915_tt->is_shmem);
-
- if (!i915_tt->filp)
+ if (!ttm_tt_is_populated(bo->ttm))
return 0;
ret = ttm_bo_wait_ctx(bo, &ctx);
@@ -792,6 +800,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
}
if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ const size_t size = (size_t)bo->ttm->num_pages << PAGE_SHIFT;
+ struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+ /*
+ * If there's no chance of allocating enough pages for the whole
+ * object, bail early.
+ */
+ if (size > resource_size(&mr->region))
+ return -ENOMEM;
+
ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
if (ret)
return ret;
@@ -807,6 +825,14 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
if (IS_ERR(rsgt))
return PTR_ERR(rsgt);
+ if (!HAS_LMEM(to_i915(obj->base.dev)) && bo->ttm) {
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ i915_gem_object_do_bit_17_swizzle(obj, &rsgt->table);
+
+ if (i915_gem_object_can_bypass_llc(obj))
+ obj->cache_dirty = true;
+ }
+
GEM_BUG_ON(obj->mm.rsgt);
obj->mm.rsgt = rsgt;
__i915_gem_object_set_pages(obj, &rsgt->table,
@@ -894,6 +920,10 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
* and shrinkers will move it out if needed.
*/
+ if (!HAS_LMEM(to_i915(obj->base.dev)) &&
+ i915_gem_object_needs_bit17_swizzle(obj))
+ i915_gem_object_save_bit_17_swizzle(obj, st);
+
if (obj->mm.rsgt)
i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt));
}
@@ -1222,6 +1252,11 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
ttm_bo_type_kernel;
+ if (!HAS_LMEM(i915) && i915->params.use_pool_alloc) {
+ GEM_WARN_ON(mem->type != INTEL_MEMORY_SYSTEM);
+ bo_type = ttm_bo_type_kernel;
+ }
+
obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);
/* Forcing the page size is kernel internal only */
@@ -1280,3 +1315,64 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
intel_memory_region_set_name(mr, "system-ttm");
return mr;
}
+
+bool i915_gem_object_is_ttm(const struct drm_i915_gem_object *obj)
+{
+ return obj->ops == &i915_gem_ttm_obj_ops;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_ttm_from_data(struct drm_i915_private *dev_priv,
+ const void *data, resource_size_t size)
+{
+ struct drm_i915_gem_object *obj;
+ void *vaddr;
+
+ obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+ if (IS_ERR(obj))
+ return obj;
+
+ vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ i915_gem_object_put(obj);
+ return vaddr;
+ }
+
+ memcpy(vaddr, data, size);
+
+ i915_gem_object_unpin_map(obj);
+
+ return obj;
+}
+
+unsigned long i915_gem_ttm_mmap(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_mmap *args)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ addr = vm_mmap(NULL, 0, args->size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ args->offset);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+ vma = find_vma(current->mm, addr);
+ if (IS_ERR_VALUE(addr)) {
+ mmap_write_unlock(mm);
+ return addr;
+ }
+
+ vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+ vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = obj->ops->mmap_ops;
+ vma->vm_private_data = bo;
+
+ mmap_write_unlock(mm);
+
+ return addr;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index e4842b4296fc..2b1772a08c9f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -95,4 +95,13 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
bool i915_ttm_resource_mappable(struct ttm_resource *res);
+bool i915_gem_object_is_ttm(const struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_ttm_from_data(struct drm_i915_private *dev_priv,
+ const void *data, resource_size_t size);
+
+unsigned long i915_gem_ttm_mmap(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_mmap *args);
+
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 9a7e50534b84..3fb7bc2de3ba 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -52,10 +52,15 @@ static enum i915_cache_level
i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
struct ttm_tt *ttm)
{
- return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
- !i915_ttm_gtt_binds_lmem(res) &&
- ttm->caching == ttm_cached) ? I915_CACHE_LLC :
- I915_CACHE_NONE;
+ if (IS_DGFX(i915))
+ /* DG platforms have no LLC but can do cache snooping */
+ return (!i915_ttm_gtt_binds_lmem(res) &&
+ ttm->caching == ttm_cached) ?
+ I915_CACHE_LLC : I915_CACHE_NONE;
+ else
+ /* Cache snooping seems to be broken in some pre-LLC platforms */
+ return (HAS_LLC(i915) && ttm->caching == ttm_cached) ?
+ I915_CACHE_LLC : I915_CACHE_NONE;
}
static struct intel_memory_region *
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 3ced9948a331..c05f89da21a5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -871,7 +871,7 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
- if (obj->ops->mmap_offset)
+ if (HAS_LMEM(i915) && obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
return false;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d5fca1f68eff..17a81bfccec8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,6 +11,7 @@
#include <drm/drm_print.h>
#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_ttm.h"
#include "intel_uc_fw.h"
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
@@ -482,7 +483,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (!IS_ERR(obj))
obj->flags |= I915_BO_ALLOC_PM_EARLY;
} else {
- obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ if (unlikely(i915->params.use_pool_alloc))
+ obj = i915_gem_object_create_ttm_from_data(i915, fw->data, fw->size);
+ else
+ obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
}
if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 6fc475a5db61..1af11f030ab1 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -207,6 +207,12 @@ i915_param_named_unsafe(lmem_size, uint, 0400,
i915_param_named_unsafe(lmem_bar_size, uint, 0400,
"Set the lmem bar size(in MiB).");
+i915_param_named_unsafe(use_pool_alloc, bool, 0600,
+ "Force the driver to use TTM's pool allocator API for smem objects. "
+ "This will cause TTM to take over BO allocation even in integrated platforms. "
+ "(default: false)");
+
+
static __always_inline void _print_param(struct drm_printer *p,
const char *name,
const char *type,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 2733cb6cfe09..6aac9c46a7fe 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -84,7 +84,9 @@ struct drm_printer;
param(bool, verbose_state_checks, true, 0) \
param(bool, nuclear_pageflip, false, 0400) \
param(bool, enable_dp_mst, true, 0600) \
- param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
+ param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \
+ param(bool, use_pool_alloc, true, 0600)
+ /* set to 'true' for trybot testing */
#define MEMBER(T, member, ...) T member;
struct i915_params {
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 9a4a7fb55582..e44b4813fd0c 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -310,6 +310,16 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915)
{
int err, i;
+ /*
+ * if (GRAPHICS_VER(i915) <= 5 &&
+ * i915->params.use_pool_alloc) {
+ * drm_dbg(&i915->drm,
+ * "TTM maybe not be used with GEN <=5 devices,"
+ * " falling back on shmem\n");
+ * i915->params.use_pool_alloc = false;
+ * }
+ */
+
for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) {
struct intel_memory_region *mem = ERR_PTR(-ENODEV);
u16 type, instance;
@@ -321,7 +331,7 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915)
instance = intel_region_map[i].instance;
switch (type) {
case INTEL_MEMORY_SYSTEM:
- if (IS_DGFX(i915))
+ if (IS_DGFX(i915) || i915->params.use_pool_alloc)
mem = i915_gem_ttm_system_setup(i915, type,
instance);
else
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 575d67bc6ffe..fafefe090a4d 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,14 @@
int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
{
struct drm_device *drm = &dev_priv->drm;
+ bool use_dma32 = false;
+
+ if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+ use_dma32 = true;
return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(),
drm->dev, drm->anon_inode->i_mapping,
- drm->vma_offset_manager, false, false);
+ drm->vma_offset_manager, false, use_dma32);
}
/**
--
2.37.0
More information about the Intel-gfx-trybot
mailing list