[PATCH v9 41/42] drm/msm: Defer VMA unmap for fb unpins
Rob Clark
robin.clark at oss.qualcomm.com
Sun Jun 29 20:13:24 UTC 2025
With the conversion to drm_gpuvm, we lost the lazy VMA cleanup, which
means that fb cleanup/unpin when pageflipping to new scanout buffers
immediately unmaps the scanout buffer. This is costly (with tlbinv,
it can be 4-6ms for a 1080p scanout buffer, and more for higher
resolutions)!
To avoid this, introduce a vma_ref, which is incremented whenever
userspace has a GEM handle or dma-buf fd. When unpinning if the
vm is the kms->vm we defer tearing down the VMA until the vma_ref
drops to zero. If the buffer is still part of a flip-chain then
userspace will be holding some sort of reference to the BO, either
via a GEM handle and/or dma-buf fd. So this avoids unmapping the VMA
when there is a strong possibility that it will be needed again.
Signed-off-by: Rob Clark <robin.clark at oss.qualcomm.com>
Tested-by: Antonino Maniscalco <antomani103 at gmail.com>
Reviewed-by: Antonino Maniscalco <antomani103 at gmail.com>
---
drivers/gpu/drm/msm/msm_drv.c | 1 +
drivers/gpu/drm/msm/msm_drv.h | 1 +
drivers/gpu/drm/msm/msm_fb.c | 5 ++-
drivers/gpu/drm/msm/msm_gem.c | 60 ++++++++++++++++++-----------
drivers/gpu/drm/msm/msm_gem.h | 28 ++++++++++++++
drivers/gpu/drm/msm/msm_gem_prime.c | 54 +++++++++++++++++++++++++-
6 files changed, 123 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9b1f1c1a41d4..0597ff6da317 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -830,6 +830,7 @@ static const struct drm_driver msm_driver = {
.postclose = msm_postclose,
.dumb_create = msm_gem_dumb_create,
.dumb_map_offset = msm_gem_dumb_map_offset,
+ .gem_prime_import = msm_gem_prime_import,
.gem_prime_import_sg_table = msm_gem_prime_import_sg_table,
#ifdef CONFIG_DEBUG_FS
.debugfs_init = msm_debugfs_init,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 200c3135bbf9..2b49c4b800ee 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -269,6 +269,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev);
struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map);
void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map);
+struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev, struct dma_buf *buf);
struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct sg_table *sg);
struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags);
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 8ae2f326ec54..bc7c2bb8f01e 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -89,6 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb, bool needs_dirtyfb)
return 0;
for (i = 0; i < n; i++) {
+ msm_gem_vma_get(fb->obj[i]);
ret = msm_gem_get_and_pin_iova(fb->obj[i], vm, &msm_fb->iova[i]);
drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
fb->base.id, i, msm_fb->iova[i], ret);
@@ -114,8 +115,10 @@ void msm_framebuffer_cleanup(struct drm_framebuffer *fb, bool needed_dirtyfb)
memset(msm_fb->iova, 0, sizeof(msm_fb->iova));
- for (i = 0; i < n; i++)
+ for (i = 0; i < n; i++) {
msm_gem_unpin_iova(fb->obj[i], vm);
+ msm_gem_vma_put(fb->obj[i]);
+ }
}
uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int plane)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 3e87d27dfcb6..33d3354c6102 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -19,6 +19,7 @@
#include "msm_drv.h"
#include "msm_gem.h"
#include "msm_gpu.h"
+#include "msm_kms.h"
static void update_device_mem(struct msm_drm_private *priv, ssize_t size)
{
@@ -39,6 +40,7 @@ static void update_ctx_mem(struct drm_file *file, ssize_t size)
static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file)
{
+ msm_gem_vma_get(obj);
update_ctx_mem(file, obj->size);
return 0;
}
@@ -46,33 +48,13 @@ static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file)
static void put_iova_spaces(struct drm_gem_object *obj, struct drm_gpuvm *vm,
bool close, const char *reason);
-static void detach_vm(struct drm_gem_object *obj, struct drm_gpuvm *vm)
-{
- msm_gem_assert_locked(obj);
- drm_gpuvm_resv_assert_held(vm);
-
- struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_find(vm, obj);
- if (vm_bo) {
- struct drm_gpuva *vma;
-
- drm_gpuvm_bo_for_each_va (vma, vm_bo) {
- if (vma->vm != vm)
- continue;
- msm_gem_vma_unmap(vma, "detach");
- msm_gem_vma_close(vma);
- break;
- }
-
- drm_gpuvm_bo_put(vm_bo);
- }
-}
-
static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file)
{
struct msm_context *ctx = file->driver_priv;
struct drm_exec exec;
update_ctx_mem(file, -obj->size);
+ msm_gem_vma_put(obj);
/*
* If VM isn't created yet, nothing to cleanup. And in fact calling
@@ -99,7 +81,31 @@ static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file)
msm_gem_lock_vm_and_obj(&exec, obj, ctx->vm);
put_iova_spaces(obj, ctx->vm, true, "close");
- detach_vm(obj, ctx->vm);
+ drm_exec_fini(&exec); /* drop locks */
+}
+
+/*
+ * Get/put for kms->vm VMA
+ */
+
+void msm_gem_vma_get(struct drm_gem_object *obj)
+{
+ atomic_inc(&to_msm_bo(obj)->vma_ref);
+}
+
+void msm_gem_vma_put(struct drm_gem_object *obj)
+{
+ struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_exec exec;
+
+ if (atomic_dec_return(&to_msm_bo(obj)->vma_ref))
+ return;
+
+ if (!priv->kms)
+ return;
+
+ msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm);
+ put_iova_spaces(obj, priv->kms->vm, true, "vma_put");
drm_exec_fini(&exec); /* drop locks */
}
@@ -656,6 +662,13 @@ int msm_gem_set_iova(struct drm_gem_object *obj,
return ret;
}
+static bool is_kms_vm(struct drm_gpuvm *vm)
+{
+ struct msm_drm_private *priv = vm->drm->dev_private;
+
+ return priv->kms && (priv->kms->vm == vm);
+}
+
/*
* Unpin a iova by updating the reference counts. The memory isn't actually
* purged until something else (shrinker, mm_notifier, destroy, etc) decides
@@ -671,7 +684,8 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, struct drm_gpuvm *vm)
if (vma) {
msm_gem_unpin_locked(obj);
}
- detach_vm(obj, vm);
+ if (!is_kms_vm(vm))
+ put_iova_spaces(obj, vm, true, "close");
drm_exec_fini(&exec); /* drop locks */
}
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 1ce97f8a30bb..5c0c59e4835c 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -211,9 +211,37 @@ struct msm_gem_object {
* Protected by LRU lock.
*/
int pin_count;
+
+ /**
+ * @vma_ref: Reference count of VMA users.
+ *
+ * With the vm_bo/vma holding a reference to the GEM object, we'd
+ * otherwise have to actively tear down a VMA when, for example,
+ * a buffer is unpinned for scanout, vs. the pre-drm_gpuvm approach
+ * where a VMA did not hold a reference to the BO, but instead was
+ * implicitly torn down when the BO was freed.
+ *
+ * To regain the lazy VMA teardown, we use the @vma_ref. It is
+ * incremented for any of the following:
+ *
+ * 1) the BO is exported as a dma_buf
+ * 2) the BO has open userspace handle
+ *
+ * All of those conditions will hold an reference to the BO,
+ * preventing it from being freed. So lazily keeping around the
+ * VMA will not prevent the BO from being freed. (Or rather, the
+ * reference loop is harmless in this case.)
+ *
+ * When the @vma_ref drops to zero, then kms->vm VMA will be
+ * torn down.
+ */
+ atomic_t vma_ref;
};
#define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
+void msm_gem_vma_get(struct drm_gem_object *obj);
+void msm_gem_vma_put(struct drm_gem_object *obj);
+
uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
int msm_gem_prot(struct drm_gem_object *obj);
int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma);
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 4d93f2daeeaa..c0a33ac839cb 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -6,6 +6,7 @@
#include <linux/dma-buf.h>
+#include <drm/drm_drv.h>
#include <drm/drm_prime.h>
#include "msm_drv.h"
@@ -42,19 +43,68 @@ void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
msm_gem_put_vaddr_locked(obj);
}
+static void msm_gem_dmabuf_release(struct dma_buf *dma_buf)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+
+ msm_gem_vma_put(obj);
+ drm_gem_dmabuf_release(dma_buf);
+}
+
+static const struct dma_buf_ops msm_gem_prime_dmabuf_ops = {
+ .attach = drm_gem_map_attach,
+ .detach = drm_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = msm_gem_dmabuf_release,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *buf)
+{
+ if (buf->ops == &msm_gem_prime_dmabuf_ops) {
+ struct drm_gem_object *obj = buf->priv;
+ if (obj->dev == dev) {
+ /*
+ * Importing dmabuf exported from our own gem increases
+ * refcount on gem itself instead of f_count of dmabuf.
+ */
+ drm_gem_object_get(obj);
+ return obj;
+ }
+ }
+
+ return drm_gem_prime_import(dev, buf);
+}
+
struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct sg_table *sg)
{
return msm_gem_import(dev, attach->dmabuf, sg);
}
-
struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags)
{
if (to_msm_bo(obj)->flags & MSM_BO_NO_SHARE)
return ERR_PTR(-EPERM);
- return drm_gem_prime_export(obj, flags);
+ msm_gem_vma_get(obj);
+
+ struct drm_device *dev = obj->dev;
+ struct dma_buf_export_info exp_info = {
+ .exp_name = KBUILD_MODNAME, /* white lie for debug */
+ .owner = dev->driver->fops->owner,
+ .ops = &msm_gem_prime_dmabuf_ops,
+ .size = obj->size,
+ .flags = flags,
+ .priv = obj,
+ .resv = obj->resv,
+ };
+
+ return drm_gem_dmabuf_export(dev, &exp_info);
}
int msm_gem_prime_pin(struct drm_gem_object *obj)
--
2.50.0
More information about the dri-devel
mailing list