[PATCH v6 3/7] accel/ivpu: Add GEM buffer object management

Tue Jan 17 09:27:19 UTC 2023

Adds four types of GEM-based BOs for the VPU:
  - shmem
  - internal
  - prime

All types are implemented as struct ivpu_bo, based on
struct drm_gem_object. VPU address is allocated when buffer is created
except for imported prime buffers that allocate it in BO_INFO IOCTL due
to missing file_priv arg in gem_prime_import callback.
Internal buffers are pinned on creation, the rest of buffers types
can be pinned on demand (in SUBMIT IOCTL).
Buffer VPU address, allocated pages and mappings are released when the
buffer is destroyed.
Eviction mechanism is planned for future versions.

Add two new IOCTLs: BO_CREATE, BO_INFO

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz at linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay at kernel.org>
Reviewed-by: Jeffrey Hugo <quic_jhugo at quicinc.com>
---
 drivers/accel/ivpu/Makefile   |   1 +
 drivers/accel/ivpu/ivpu_drv.c |  30 +-
 drivers/accel/ivpu/ivpu_drv.h |   1 +
 drivers/accel/ivpu/ivpu_gem.c | 727 ++++++++++++++++++++++++++++++++++
 drivers/accel/ivpu/ivpu_gem.h | 126 ++++++
 include/uapi/drm/ivpu_accel.h |  94 +++++
 6 files changed, 977 insertions(+), 2 deletions(-)
 create mode 100644 drivers/accel/ivpu/ivpu_gem.c
 create mode 100644 drivers/accel/ivpu/ivpu_gem.h

diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index 59cd7843b2189..5d7c5862399ca 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -3,6 +3,7 @@
 
 intel_vpu-y := \
 	ivpu_drv.o \
+	ivpu_gem.o \
 	ivpu_hw_mtl.o \
 	ivpu_mmu.o \
 	ivpu_mmu_context.o
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 69ca7dae8b57c..53d92d06814bf 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -12,8 +12,10 @@
 #include <drm/drm_file.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_prime.h>
 
 #include "ivpu_drv.h"
+#include "ivpu_gem.h"
 #include "ivpu_hw.h"
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
@@ -49,6 +51,24 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
 	return file_priv;
 }
 
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id)
+{
+	struct ivpu_file_priv *file_priv;
+
+	xa_lock_irq(&vdev->context_xa);
+	file_priv = xa_load(&vdev->context_xa, id);
+	/* file_priv may still be in context_xa during file_priv_release() */
+	if (file_priv && !kref_get_unless_zero(&file_priv->ref))
+		file_priv = NULL;
+	xa_unlock_irq(&vdev->context_xa);
+
+	if (file_priv)
+		ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n",
+			 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	return file_priv;
+}
+
 static void file_priv_release(struct kref *ref)
 {
 	struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref);
@@ -57,7 +77,7 @@ static void file_priv_release(struct kref *ref)
 	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
 
 	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
-	WARN_ON(xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
+	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
 	kfree(file_priv);
 }
 
@@ -66,7 +86,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
 	struct ivpu_file_priv *file_priv = *link;
 	struct ivpu_device *vdev = file_priv->vdev;
 
-	WARN_ON(!file_priv);
+	drm_WARN_ON(&vdev->drm, !file_priv);
 
 	ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n",
 		 file_priv->ctx.id, kref_read(&file_priv->ref));
@@ -200,6 +220,8 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file)
 static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
 };
 
 int ivpu_shutdown(struct ivpu_device *vdev)
@@ -227,6 +249,10 @@ static const struct drm_driver driver = {
 
 	.open = ivpu_open,
 	.postclose = ivpu_postclose,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = ivpu_gem_prime_import,
+	.gem_prime_mmap = drm_gem_prime_mmap,
 
 	.ioctls = ivpu_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index b9ce66f406968..972f71cfaa129 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -115,6 +115,7 @@ extern u8 ivpu_pll_min_ratio;
 extern u8 ivpu_pll_max_ratio;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id);
 void ivpu_file_priv_put(struct ivpu_file_priv **link);
 int ivpu_shutdown(struct ivpu_device *vdev);
 
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
new file mode 100644
index 0000000000000..b938359b19afc
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/set_memory.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs;
+
+static struct lock_class_key prime_bo_lock_class_key;
+
+static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+	return 0;
+}
+
+static void prime_free_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+}
+
+static int prime_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+
+	WARN_ON(!bo->base.import_attach);
+
+	sgt = dma_buf_map_attachment(bo->base.import_attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
+		return PTR_ERR(sgt);
+	}
+
+	bo->sgt = sgt;
+	return 0;
+}
+
+static void prime_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	WARN_ON(!bo->base.import_attach);
+
+	dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops prime_ops = {
+	.type = IVPU_BO_TYPE_PRIME,
+	.name = "prime",
+	.alloc_pages = prime_alloc_pages_locked,
+	.free_pages = prime_free_pages_locked,
+	.map_pages = prime_map_pages_locked,
+	.unmap_pages = prime_unmap_pages_locked,
+};
+
+static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+
+	pages = drm_gem_get_pages(&bo->base);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	if (bo->flags & DRM_IVPU_BO_WC)
+		set_pages_array_wc(pages, npages);
+	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		set_pages_array_uc(pages, npages);
+
+	bo->pages = pages;
+	return 0;
+}
+
+static void shmem_free_pages_locked(struct ivpu_bo *bo)
+{
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	drm_gem_put_pages(&bo->base, bo->pages, true, false);
+	bo->pages = NULL;
+}
+
+static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+	int ret;
+
+	sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to allocate sgtable\n");
+		return PTR_ERR(sgt);
+	}
+
+	ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+		goto err_free_sgt;
+	}
+
+	bo->sgt = sgt;
+	return 0;
+
+err_free_sgt:
+	kfree(sgt);
+	return ret;
+}
+
+static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
+	sg_free_table(bo->sgt);
+	kfree(bo->sgt);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops shmem_ops = {
+	.type = IVPU_BO_TYPE_SHMEM,
+	.name = "shmem",
+	.alloc_pages = shmem_alloc_pages_locked,
+	.free_pages = shmem_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+	int ret;
+
+	pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+		if (!pages[i]) {
+			ret = -ENOMEM;
+			goto err_free_pages;
+		}
+		cond_resched();
+	}
+
+	bo->pages = pages;
+	return 0;
+
+err_free_pages:
+	while (i--)
+		put_page(pages[i]);
+	kvfree(pages);
+	return ret;
+}
+
+static void internal_free_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+
+	for (i = 0; i < npages; i++)
+		put_page(bo->pages[i]);
+
+	kvfree(bo->pages);
+	bo->pages = NULL;
+}
+
+static const struct ivpu_bo_ops internal_ops = {
+	.type = IVPU_BO_TYPE_INTERNAL,
+	.name = "internal",
+	.alloc_pages = internal_alloc_pages_locked,
+	.free_pages = internal_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	lockdep_assert_held(&bo->lock);
+	drm_WARN_ON(&vdev->drm, bo->sgt);
+
+	ret = bo->ops->alloc_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
+		return ret;
+	}
+
+	ret = bo->ops->map_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
+		goto err_free_pages;
+	}
+	return ret;
+
+err_free_pages:
+	bo->ops->free_pages(bo);
+	return ret;
+}
+
+static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+{
+	mutex_lock(&bo->lock);
+
+	WARN_ON(!bo->sgt);
+	bo->ops->unmap_pages(bo);
+	WARN_ON(bo->sgt);
+	bo->ops->free_pages(bo);
+	WARN_ON(bo->pages);
+
+	mutex_unlock(&bo->lock);
+}
+
+/*
+ * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ *
+ * This function pins physical memory pages, then maps the physical pages
+ * to IOMMU address space and finally updates the VPU MMU page tables
+ * to allow the VPU to translate VPU address to IOMMU address.
+ */
+int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->vpu_addr) {
+		ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
+			 bo->ctx->id, bo->handle);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (!bo->sgt) {
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (ret)
+			goto unlock;
+	}
+
+	if (!bo->mmu_mapped) {
+		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+					       ivpu_bo_is_snooped(bo));
+		if (ret) {
+			ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
+			goto unlock;
+		}
+		bo->mmu_mapped = true;
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static int
+ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
+		       const struct ivpu_addr_range *range)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	if (!range) {
+		if (bo->flags & DRM_IVPU_BO_HIGH_MEM)
+			range = &vdev->hw->ranges.user_high;
+		else
+			range = &vdev->hw->ranges.user_low;
+	}
+
+	mutex_lock(&ctx->lock);
+	ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+	if (!ret) {
+		bo->ctx = ctx;
+		bo->vpu_addr = bo->mm_node.start;
+		list_add_tail(&bo->ctx_node, &ctx->bo_list);
+	}
+	mutex_unlock(&ctx->lock);
+
+	return ret;
+}
+
+static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct ivpu_mmu_context *ctx = bo->ctx;
+
+	ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+		 ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+
+	mutex_lock(&bo->lock);
+
+	if (bo->mmu_mapped) {
+		drm_WARN_ON(&vdev->drm, !bo->sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+		bo->mmu_mapped = false;
+	}
+
+	mutex_lock(&ctx->lock);
+	list_del(&bo->ctx_node);
+	bo->vpu_addr = 0;
+	bo->ctx = NULL;
+	ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
+	mutex_unlock(&ctx->lock);
+
+	mutex_unlock(&bo->lock);
+}
+
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+{
+	struct ivpu_bo *bo, *tmp;
+
+	list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
+		ivpu_bo_free_vpu_addr(bo);
+}
+
+static struct ivpu_bo *
+ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
+	      u64 size, u32 flags, const struct ivpu_bo_ops *ops,
+	      const struct ivpu_addr_range *range, u64 user_ptr)
+{
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+		return ERR_PTR(-EINVAL);
+
+	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+	case DRM_IVPU_BO_CACHED:
+	case DRM_IVPU_BO_UNCACHED:
+	case DRM_IVPU_BO_WC:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&bo->lock);
+	bo->base.funcs = &ivpu_gem_funcs;
+	bo->flags = flags;
+	bo->ops = ops;
+	bo->user_ptr = user_ptr;
+
+	if (ops->type == IVPU_BO_TYPE_SHMEM)
+		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
+	else
+		drm_gem_private_object_init(&vdev->drm, &bo->base, size);
+
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize drm object\n");
+		goto err_free;
+	}
+
+	if (flags & DRM_IVPU_BO_MAPPABLE) {
+		ret = drm_gem_create_mmap_offset(&bo->base);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate mmap offset\n");
+			goto err_release;
+		}
+	}
+
+	if (mmu_context) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
+		if (ret) {
+			ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
+			goto err_release;
+		}
+	}
+
+	return bo;
+
+err_release:
+	drm_gem_object_release(&bo->base);
+err_free:
+	kfree(bo);
+	return ERR_PTR(ret);
+}
+
+static void ivpu_bo_free(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	if (bo->ctx)
+		ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+			 bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+	else
+		ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
+			 (bool)bo->sgt, bo->mmu_mapped);
+
+	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+
+	vunmap(bo->kvaddr);
+
+	if (bo->ctx)
+		ivpu_bo_free_vpu_addr(bo);
+
+	if (bo->sgt)
+		ivpu_bo_unmap_and_free_pages(bo);
+
+	if (bo->base.import_attach)
+		drm_prime_gem_destroy(&bo->base, bo->sgt);
+
+	drm_gem_object_release(&bo->base);
+
+	mutex_destroy(&bo->lock);
+	kfree(bo);
+}
+
+static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
+		 bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+
+	if (obj->import_attach) {
+		/* Drop the reference drm_gem_mmap_obj() acquired.*/
+		drm_gem_object_put(obj);
+		vma->vm_private_data = NULL;
+		return dma_buf_mmap(obj->dma_buf, vma, 0);
+	}
+
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+
+	return 0;
+}
+
+static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt)
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+
+	mutex_unlock(&bo->lock);
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+}
+
+static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	pgoff_t page_offset;
+	struct page *page;
+	vm_fault_t ret;
+	int err;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt) {
+		err = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (err) {
+			ret = vmf_error(err);
+			goto unlock;
+		}
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	if (page_offset >= npages) {
+		ret = VM_FAULT_SIGBUS;
+	} else {
+		page = bo->pages[page_offset];
+		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static const struct vm_operations_struct ivpu_vm_ops = {
+	.fault = ivpu_vm_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs = {
+	.free = ivpu_bo_free,
+	.mmap = ivpu_bo_mmap,
+	.vm_ops = &ivpu_vm_ops,
+	.get_sg_table = ivpu_bo_get_sg_table,
+};
+
+int
+ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_bo_create *args = data;
+	u64 size = PAGE_ALIGN(args->size);
+	struct ivpu_bo *bo;
+	int ret;
+
+	if (args->flags & ~DRM_IVPU_BO_FLAGS)
+		return -EINVAL;
+
+	if (size == 0)
+		return -EINVAL;
+
+	bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
+			 bo, file_priv->ctx.id, args->size, args->flags);
+		return PTR_ERR(bo);
+	}
+
+	ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+	if (!ret) {
+		args->vpu_addr = bo->vpu_addr;
+		args->handle = bo->handle;
+	}
+
+	drm_gem_object_put(&bo->base);
+
+	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+
+	return ret;
+}
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
+{
+	const struct ivpu_addr_range *range;
+	struct ivpu_addr_range fixed_range;
+	struct ivpu_bo *bo;
+	pgprot_t prot;
+	int ret;
+
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
+
+	if (vpu_addr) {
+		fixed_range.start = vpu_addr;
+		fixed_range.end = vpu_addr + size;
+		range = &fixed_range;
+	} else {
+		range = &vdev->hw->ranges.global_low;
+	}
+
+	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+			 bo, vpu_addr, size, flags);
+		return NULL;
+	}
+
+	ret = ivpu_bo_pin(bo);
+	if (ret)
+		goto err_put;
+
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
+	bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+	if (!bo->kvaddr) {
+		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+		goto err_put;
+	}
+
+	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 bo->vpu_addr, bo->base.size, flags);
+
+	return bo;
+
+err_put:
+	drm_gem_object_put(&bo->base);
+	return NULL;
+}
+
+void ivpu_bo_free_internal(struct ivpu_bo *bo)
+{
+	drm_gem_object_put(&bo->base);
+}
+
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct dma_buf_attachment *attach;
+	struct ivpu_bo *bo;
+
+	attach = dma_buf_attach(buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(buf);
+
+	bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
+		goto err_detach;
+	}
+
+	lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
+
+	bo->base.import_attach = attach;
+
+	return &bo->base;
+
+err_detach:
+	dma_buf_detach(buf, attach);
+	dma_buf_put(buf);
+	return ERR_CAST(bo);
+}
+
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct drm_ivpu_bo_info *args = data;
+	struct drm_gem_object *obj;
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	bo = to_ivpu_bo(obj);
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->ctx) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
+			goto unlock;
+		}
+	}
+
+	args->flags = bo->flags;
+	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
+	args->vpu_addr = bo->vpu_addr;
+	args->size = obj->size;
+unlock:
+	mutex_unlock(&bo->lock);
+	drm_gem_object_put(obj);
+	return ret;
+}
+
+static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
+{
+	unsigned long dma_refcount = 0;
+
+	if (bo->base.dma_buf && bo->base.dma_buf->file)
+		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+
+	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
+		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+}
+
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+	struct ivpu_bo *bo;
+
+	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
+		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+
+	mutex_lock(&vdev->gctx.lock);
+	list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+		ivpu_bo_print_info(bo, p);
+	mutex_unlock(&vdev->gctx.lock);
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		mutex_lock(&file_priv->ctx.lock);
+		list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
+			ivpu_bo_print_info(bo, p);
+		mutex_unlock(&file_priv->ctx.lock);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+void ivpu_bo_list_print(struct drm_device *dev)
+{
+	struct drm_printer p = drm_info_printer(dev->dev);
+
+	ivpu_bo_list(dev, &p);
+}
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
new file mode 100644
index 0000000000000..1891c90702c20
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+#ifndef __IVPU_GEM_H__
+#define __IVPU_GEM_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_mm.h>
+
+struct dma_buf;
+struct ivpu_bo_ops;
+struct ivpu_file_priv;
+
+struct ivpu_bo {
+	struct drm_gem_object base;
+	const struct ivpu_bo_ops *ops;
+
+	struct ivpu_mmu_context *ctx;
+	struct list_head ctx_node;
+	struct drm_mm_node mm_node;
+
+	struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
+	struct sg_table *sgt;
+	struct page **pages;
+	bool mmu_mapped;
+
+	void *kvaddr;
+	u64 vpu_addr;
+	u32 handle;
+	u32 flags;
+	uintptr_t user_ptr;
+};
+
+enum ivpu_bo_type {
+	IVPU_BO_TYPE_SHMEM = 1,
+	IVPU_BO_TYPE_INTERNAL,
+	IVPU_BO_TYPE_PRIME,
+};
+
+struct ivpu_bo_ops {
+	enum ivpu_bo_type type;
+	const char *name;
+	int (*alloc_pages)(struct ivpu_bo *bo);
+	void (*free_pages)(struct ivpu_bo *bo);
+	int (*map_pages)(struct ivpu_bo *bo);
+	void (*unmap_pages)(struct ivpu_bo *bo);
+};
+
+int ivpu_bo_pin(struct ivpu_bo *bo);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+void ivpu_bo_free_internal(struct ivpu_bo *bo);
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
+void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
+
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct ivpu_bo, base);
+}
+
+static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
+{
+	if (offset > bo->base.size || !bo->pages)
+		return NULL;
+
+	return bo->pages[offset / PAGE_SIZE];
+}
+
+static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
+{
+	return bo->flags & DRM_IVPU_BO_CACHE_MASK;
+}
+
+static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
+{
+	return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
+}
+
+static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
+{
+	if (bo->flags & DRM_IVPU_BO_WC)
+		return pgprot_writecombine(prot);
+
+	if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		return pgprot_noncached(prot);
+
+	return prot;
+}
+
+static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
+{
+	return to_ivpu_device(bo->base.dev);
+}
+
+static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
+{
+	if (vpu_addr < bo->vpu_addr)
+		return NULL;
+
+	if (vpu_addr >= (bo->vpu_addr + bo->base.size))
+		return NULL;
+
+	return bo->kvaddr + (vpu_addr - bo->vpu_addr);
+}
+
+static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
+{
+	if (cpu_addr < bo->kvaddr)
+		return 0;
+
+	if (cpu_addr >= (bo->kvaddr + bo->base.size))
+		return 0;
+
+	return bo->vpu_addr + (cpu_addr - bo->kvaddr);
+}
+
+#endif /* __IVPU_GEM_H__ */
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 543347df51a1f..093b83c5697ea 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -17,6 +17,8 @@ extern "C" {
 
 #define DRM_IVPU_GET_PARAM		  0x00
 #define DRM_IVPU_SET_PARAM		  0x01
+#define DRM_IVPU_BO_CREATE		  0x02
+#define DRM_IVPU_BO_INFO		  0x03
 
 #define DRM_IOCTL_IVPU_GET_PARAM                                               \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param)
@@ -24,6 +26,12 @@ extern "C" {
 #define DRM_IOCTL_IVPU_SET_PARAM                                               \
 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param)
 
+#define DRM_IOCTL_IVPU_BO_CREATE                                               \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, struct drm_ivpu_bo_create)
+
+#define DRM_IOCTL_IVPU_BO_INFO                                                 \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info)
+
 /**
  * DOC: contexts
  *
@@ -92,6 +100,92 @@ struct drm_ivpu_param {
 	__u64 value;
 };
 
+#define DRM_IVPU_BO_HIGH_MEM   0x00000001
+#define DRM_IVPU_BO_MAPPABLE   0x00000002
+
+#define DRM_IVPU_BO_CACHED     0x00000000
+#define DRM_IVPU_BO_UNCACHED   0x00010000
+#define DRM_IVPU_BO_WC	       0x00020000
+#define DRM_IVPU_BO_CACHE_MASK 0x00030000
+
+#define DRM_IVPU_BO_FLAGS \
+	(DRM_IVPU_BO_HIGH_MEM | \
+	 DRM_IVPU_BO_MAPPABLE | \
+	 DRM_IVPU_BO_CACHE_MASK)
+
+/**
+ * struct drm_ivpu_bo_create - Create BO backed by SHMEM
+ *
+ * Create GEM buffer object allocated in SHMEM memory.
+ */
+struct drm_ivpu_bo_create {
+	/** @size: The size in bytes of the allocated memory */
+	__u64 size;
+
+	/**
+	 * @flags:
+	 *
+	 * Supported flags:
+	 *
+	 * %DRM_IVPU_BO_HIGH_MEM:
+	 *
+	 * Allocate VPU address from >4GB range.
+	 * Buffer object with vpu address >4GB can be always accessed by the
+	 * VPU DMA engine, but some HW generation may not be able to access
+	 * this memory from then firmware running on the VPU management processor.
+	 * Suitable for input, output and some scratch buffers.
+	 *
+	 * %DRM_IVPU_BO_MAPPABLE:
+	 *
+	 * Buffer object can be mapped using mmap().
+	 *
+	 * %DRM_IVPU_BO_CACHED:
+	 *
+	 * Allocated BO will be cached on host side (WB) and snooped on the VPU side.
+	 * This is the default caching mode.
+	 *
+	 * %DRM_IVPU_BO_UNCACHED:
+	 *
+	 * Allocated BO will not be cached on host side nor snooped on the VPU side.
+	 *
+	 * %DRM_IVPU_BO_WC:
+	 *
+	 * Allocated BO will use write combining buffer for writes but reads will be
+	 * uncached.
+	 */
+	__u32 flags;
+
+	/** @handle: Returned GEM object handle */
+	__u32 handle;
+
+	/** @vpu_addr: Returned VPU virtual address */
+	__u64 vpu_addr;
+};
+
+/**
+ * struct drm_ivpu_bo_info - Query buffer object info
+ */
+struct drm_ivpu_bo_info {
+	/** @handle: Handle of the queried BO */
+	__u32 handle;
+
+	/** @flags: Returned flags used to create the BO */
+	__u32 flags;
+
+	/** @vpu_addr: Returned VPU virtual address */
+	__u64 vpu_addr;
+
+	/**
+	 * @mmap_offset:
+	 *
+	 * Returned offset to be used in mmap(). 0 in case the BO is not mappable.
+	 */
+	__u64 mmap_offset;
+
+	/** @size: Returned GEM object size, aligned to PAGE_SIZE */
+	__u64 size;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.34.1