[PATCH] drm/amd: Enable checkpoint and restore of VRAM Bos with no VA

Ramesh Errabolu Ramesh.Errabolu at amd.com
Thu Nov 16 02:47:05 UTC 2023


Tag VRAM BOs that do not have a VA with a unique Id, a 128-bit
UUID. This unique Id is used to distinguish BOs that might
otherwise be of same size. Checkpoint and restore assumes
that these BOs are not imported into a DRM device that is
accessible either from current process or its parent or
child process

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  29 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c      | 190 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h      | 103 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  17 ++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  30 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   2 +-
 9 files changed, 370 insertions(+), 10 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 260e32ef7bae..851e2c4db372 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -270,7 +270,8 @@ amdgpu-y += \
 	amdgpu_amdkfd_gc_9_4_3.o \
 	amdgpu_amdkfd_gfx_v10.o \
 	amdgpu_amdkfd_gfx_v10_3.o \
-	amdgpu_amdkfd_gfx_v11.o
+	amdgpu_amdkfd_gfx_v11.o \
+	amdgpu_criu.o
 
 ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
 amdgpu-y += amdgpu_amdkfd_gfx_v7.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fcf8a98ad15e..6c0d7e6a66cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -289,7 +289,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct amdgpu_device *adev, uint64_t va, uint64_t size,
 		void *drm_priv, struct kgd_mem **mem,
-		uint64_t *offset, uint32_t flags, bool criu_resume);
+		uint64_t *offset, uint32_t flags,
+		bool criu_resume, uuid_t *uuid);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
 		uint64_t *size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2e302956a279..b139ffd519e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <linux/uuid.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include "amdgpu_object.h"
@@ -35,6 +36,7 @@
 #include "amdgpu_dma_buf.h"
 #include <uapi/linux/kfd_ioctl.h>
 #include "amdgpu_xgmi.h"
+#include "amdgpu_criu.h"
 #include "kfd_priv.h"
 #include "kfd_smi_events.h"
 
@@ -1718,7 +1720,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct amdgpu_device *adev, uint64_t va, uint64_t size,
 		void *drm_priv, struct kgd_mem **mem,
-		uint64_t *offset, uint32_t flags, bool criu_resume)
+		uint64_t *offset, uint32_t flags,
+		bool criu_resume, uuid_t *uuid)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 	struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
@@ -1814,13 +1817,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		 va, (*mem)->aql_queue ? size << 1 : size,
 		 domain_string(alloc_domain), xcp_id);
 
-	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
-				       bo_type, NULL, &gobj, xcp_id + 1);
+	/* Construction of VRAM BO one with no VA, during CRIU Restore
+	 * should consult BO table. Will return either a previously
+	 * constructed BO or will construct a BO anew
+	 */
+	if (criu_resume && (va == 0) && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+		ret = restore_vram_bo(adev, aligned_size, 1, alloc_flags, uuid,
+				&gobj, xcp_id + 1);
+	else
+		ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain,
+				alloc_flags, bo_type, NULL, &gobj, xcp_id + 1);
+
 	if (ret) {
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
 			 domain_string(alloc_domain), ret);
 		goto err_bo_create;
 	}
+
 	ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
 	if (ret) {
 		pr_debug("Failed to allow vma node access. ret %d\n", ret);
@@ -1843,6 +1856,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
+	/* Initialize the UUID field of a BO that:
+	 *     - Represents a VRAM BO
+	 *     - Does not have a VA bound
+	 *     - Is allocated outside CRIU Resume procedure
+	 */
+	if (!criu_resume && (va == 0) && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) {
+		if (uuid_is_null(&bo->uuid))
+			uuid_gen(&bo->uuid);
+	}
+
 	if (user_addr) {
 		pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
 		ret = init_user_pages(*mem, user_addr, criu_resume);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
new file mode 100644
index 000000000000..4b43a3df6913
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_criu.h"
+
+/*
+ * Hash table to host BOs that have their unique IDs initialized
+ * The table comes into play during CRIU Restore procedure
+ *
+ * @note: Currently these BOs encapsulate device memory i.e. are
+ * VRAM BOs
+ */
+DECLARE_HASHTABLE(criu_bo_table, CRIU_BO_TABLE_SIZE);
+static DEFINE_MUTEX(criu_mutex);
+
+/* Global counter to track life of Hash table */
+atomic_t criu_bo_counter = ATOMIC_INIT(0);
+
+void print_uuid(uuid_t *uuid)
+{
+	pr_err("\n");
+	for (int idx = 0; idx < 16; idx++)
+		pr_err("Idx[%d] %d\n", idx, uuid->b[idx]);
+	pr_err("\n");
+}
+
+void print_uuid_compare(uuid_t *uuid1, uuid_t *uuid2)
+{
+	pr_err("\n");
+	for (int idx = 0; idx < 16; idx++)
+		pr_err("Idx[%d] %d,  %d\n", idx, uuid1->b[idx], uuid2->b[idx]);
+	pr_err("\n");
+}
+
+void inc_table_counter(uint32_t cntr)
+{
+	int init;
+
+	mutex_lock(&criu_mutex);
+	init = atomic_read(&criu_bo_counter);
+	if (init == 0x00) {
+		pr_debug("%s(), Invoking hash_init api\n", __func__);
+		hash_init(criu_bo_table);
+	}
+
+	atomic_add(cntr, &criu_bo_counter);
+	init = atomic_read(&criu_bo_counter);
+	mutex_unlock(&criu_mutex);
+}
+
+static void free_bo_table(void)
+{
+	struct criu_bo_uuid *bo_uuid = NULL;
+	uint32_t bkt;
+
+	hash_for_each_rcu(criu_bo_table, bkt, bo_uuid, node)
+		hash_del_rcu(&bo_uuid->node);
+}
+
+void dec_table_counter(uint32_t cntr)
+{
+	uint32_t deinit;
+
+	mutex_lock(&criu_mutex);
+	atomic_sub(cntr, &criu_bo_counter);
+	deinit = atomic_read(&criu_bo_counter);
+	if (deinit == 0x00) {
+		pr_debug("%s(), Invoking free_bo_table api\n", __func__);
+		free_bo_table();
+	}
+
+	if (deinit < 0)
+		pr_err("%s(), BO Table counter is inconsistent: %d\n", __func__, deinit);
+
+	mutex_unlock(&criu_mutex);
+}
+
+uint32_t query_table_counter(void)
+{
+	uint32_t cntr;
+
+	mutex_lock(&criu_mutex);
+	cntr = atomic_read(&criu_bo_counter);
+	mutex_unlock(&criu_mutex);
+	return cntr;
+}
+
+/* Determine if BO is present in Hash table */
+static void add_bo_uuid(struct criu_bo_uuid *bo_uuid)
+{
+	mutex_lock(&criu_mutex);
+	hash_add_rcu(criu_bo_table, &bo_uuid->node, (uintptr_t)bo_uuid->uuid);
+	mutex_unlock(&criu_mutex);
+}
+
+/* Determine if BO is present in Hash table
+ *
+ * @note: Does the look up object based on value of key
+ * and not just its integer value
+ */
+static struct criu_bo_uuid *get_bo_uuid(uuid_t *uuid)
+{
+	struct criu_bo_uuid *bo_uuid = NULL;
+	uint32_t bkt;
+
+	mutex_lock(&criu_mutex);
+	hash_for_each_rcu(criu_bo_table, bkt, bo_uuid, node)
+		if (uuid_equal(uuid, bo_uuid->uuid))
+			goto ret_abo;
+
+ret_abo:
+	mutex_unlock(&criu_mutex);
+	return bo_uuid;
+}
+
+int restore_vram_bo(struct amdgpu_device *adev,
+		    unsigned long size, int align, u64 flags,
+		    uuid_t *uuid, struct drm_gem_object **gobj, int8_t xcp_id_plus1)
+{
+	enum ttm_bo_type bo_type = ttm_bo_type_device;
+	u32 domain = AMDGPU_GEM_DOMAIN_VRAM;
+	struct criu_bo_uuid *bo_uuid;
+	struct amdgpu_bo *abo;
+	int ret;
+
+	/* Determine if VRAM was built originally for exporting it
+	 * to peers. Currently the only VRAM BOs that are exportable
+	 * are those that do not have a VA attached
+	 */
+	if (unlikely(uuid == NULL)) {
+		pr_err("A NULL UUID is Illegal for VRAM BOs without a VA\n");
+		return -EINVAL;
+	}
+
+	/* Determine if BO is already present in hash table */
+	bo_uuid = get_bo_uuid(uuid);
+
+	/* Return the BO present in table */
+	if (bo_uuid != NULL) {
+		abo = bo_uuid->abo;
+		*gobj = &(abo->tbo.base);
+		return 0;
+	}
+
+	/* Build the BO and add it to table before returning it */
+	ret = amdgpu_gem_object_create(adev, size, align,
+			domain, flags, bo_type, NULL, gobj, xcp_id_plus1);
+	if (ret) {
+		pr_err("Failed to Restore VRAM BO, Retval: %d\n", ret);
+		return ret;
+	}
+
+	/* Re-init uuid of BO that identifies it uniquely and
+	 * add the BO into the table
+	 */
+	abo = gem_to_amdgpu_bo(*gobj);
+	uuid_copy(&(abo->uuid), uuid);
+	bo_uuid = kzalloc(sizeof(*bo_uuid), GFP_KERNEL);
+	if (bo_uuid == NULL)
+		return -EINVAL;
+	bo_uuid->abo = abo;
+	bo_uuid->uuid = uuid;
+	add_bo_uuid(bo_uuid);
+
+	/* Return the BO that was built */
+	*gobj = &(abo->tbo.base);
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
new file mode 100644
index 000000000000..b895c698a2e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef AMDGPU_CRIU_H_INCLUDED
+#define AMDGPU_CRIU_H_INCLUDED
+
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/pagemap.h>
+#include <linux/dma-buf.h>
+#include <linux/sched/mm.h>
+#include <linux/hashtable.h>
+#include <linux/sched/task.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+#include "amdgpu_object.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+
+/* Specify Hash table and its size to host VRAM BOs that have their
+ * unique IDs iniialized. These BOs which can be exported as Dmabuf
+ * allows user space to bind different virtual addresses on different
+ * DRM devices.
+ *
+ * @note: Currently these BOs encapsulate device memory i.e. are VRAM BOs
+ */
+#define CRIU_BO_TABLE_SIZE	    8
+extern DECLARE_HASHTABLE(criu_bo_table, CRIU_BO_TABLE_SIZE);
+
+struct criu_bo_uuid {
+
+	/* Unique ID of BO, serves the role of KEY */
+	uuid_t *uuid;
+
+	/* Handle of BO, serves the role of VALUE */
+	struct amdgpu_bo *abo;
+
+	/* Allows chaining of BO being managed by table */
+	struct hlist_node node;
+};
+
+
+/* Global counter to track life of Hash table */
+extern atomic_t criu_bo_counter;
+
+/**
+ * restore_vram_bo() - Returns handle of a GEM object either by look up
+ * or by construction. Look up a Global BO table to determine if the BO
+ * of concern has already been constructed. By construction if the look
+ * up fails to find the BO in the global BO table
+ *
+ * NOTE: Following two conditions must be held TRUE when invoking this method
+ *  - This method deals with VRAM BOs only. Invoking this method to handle
+ *    BOs of other kinds is invalid.
+ *  - This method is invoked during a CRIU Restore procedure. An Invocation
+ *    outside of this scneario is invalid
+ *
+ * @adev: Handle of device to use in construction
+ * @size: BO's memory size in bytes
+ * @align: Alignment requirements, if any, in allocating memory
+ * @flags: Flags to apply in allocating memory
+ * @uuid: Handle of UUID object to be restored
+ * @gobj: Output parameter updated with handle of GEM object
+ * @xcp_id_plus1: ID of the XCD on which BO is to be created
+ *
+ * Return: ZERO if successful, a negative value in case of error
+ */
+int restore_vram_bo(struct amdgpu_device *adev,
+		    unsigned long size, int align, u64 flags,
+		    uuid_t *uuid, struct drm_gem_object **gobj,
+		    int8_t xcp_id_plus1);
+
+void print_uuid(uuid_t *uuid);
+void print_uuid_compare(uuid_t *uuid1, uuid_t *uuid2);
+
+uint32_t query_table_counter(void);
+void inc_table_counter(uint32_t cntr);
+void dec_table_counter(uint32_t cntr);
+
+#endif	/* AMDGPU_CRIU_H_INCLUDED */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d28e21baef16..dc61b252fe49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -117,6 +117,23 @@ struct amdgpu_bo {
 	 * for memory accounting.
 	 */
 	int8_t				xcp_id;
+
+	/*
+	 * @uuid: Unique ID of a BO that is being exported. The 128-bit ID is
+	 * considered to be unique across processes and time. One use of this
+	 * ID is to support CRIU operations of Checkpointing & Restore.
+	 *
+	 * ID is a byte array of length UUID_SIZE. This is to accommodate UUID,
+	 * a 128-bit number defined by RFC 4122. Hex string form of UUID is
+	 * defined as a sequence of 32 hexadecimal digits, divided into five
+	 * groups that are delimited by hyphens "-". The sequence of groups
+	 * from length perspective is: 8-4-4-4-12.
+	 *
+	 * The default value of this field is set ZEROS. It is initialized to a
+	 * NON-ZERO value when a BO is exported using GEM Prime Apis. Currently
+	 * the only BOs that can be exported are GTT and VRAM BOs.
+	 */
+	uuid_t uuid;
 };
 
 struct amdgpu_bo_user {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..310a48b627ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
 #include <linux/dma-buf.h>
 #include <linux/fdtable.h>
 #include <linux/processor.h>
+#include <linux/uuid.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_svm.h"
@@ -45,6 +46,7 @@
 #include "kfd_smi_events.h"
 #include "amdgpu_dma_buf.h"
 #include "kfd_debug.h"
+#include "amdgpu_criu.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1147,7 +1149,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		dev->adev, args->va_addr, args->size,
 		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
-		flags, false);
+		flags, false, NULL);
 
 	if (err)
 		goto err_unlock;
@@ -1848,6 +1850,12 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
 		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
 			struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
 
+			/* Count BOs whose VA is either zero or is equal
+			 * to or exceed GPUVMs base address
+			 *
+			 * @note: BOs whose VA is below GPUVM base are
+			 * used internally, e.g. Trap handler buffer
+			 */
 			if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
 				num_of_bos++;
 		}
@@ -1936,6 +1944,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
 			bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
 			bo_priv->idr_handle = id;
 
+			/* Copy uuid of BO that identifies it uniquely
+			 * Currently this is true for only VRAM BOs that
+			 * have been exported
+			 */
+			uuid_copy((uuid_t *)bo_priv->uuid, &kgd_mem->bo->uuid);
+
 			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 				ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
 								&bo_priv->user_addr);
@@ -2295,6 +2309,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	int ret;
 	const bool criu_resume = true;
 	u64 offset;
+	uuid_t *uuid;
 
 	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
 		if (bo_bucket->size !=
@@ -2318,10 +2333,17 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 		offset = bo_priv->user_addr;
 	}
-	/* Create the BO */
+
+	/* Acquire handle of UUID of BO if need be */
+	uuid = NULL;
+	if ((bo_bucket->addr == 0) &&
+	    (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+		uuid = (uuid_t *)bo_priv->uuid;
+
 	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
 						      bo_bucket->size, pdd->drm_priv, kgd_mem,
-						      &offset, bo_bucket->alloc_flags, criu_resume);
+						      &offset, bo_bucket->alloc_flags,
+							  criu_resume, uuid);
 	if (ret) {
 		pr_err("Could not create the BO\n");
 		return ret;
@@ -2728,10 +2750,12 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
 		ret = criu_unpause(filep, p, args);
 		break;
 	case KFD_CRIU_OP_RESTORE:
+		inc_table_counter(p->n_pdds);
 		ret = criu_restore(filep, p, args);
 		break;
 	case KFD_CRIU_OP_RESUME:
 		ret = criu_resume(filep, p, args);
+		dec_table_counter(p->n_pdds);
 		break;
 	default:
 		dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a40f8cfc6aa5..320408239896 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1178,7 +1178,7 @@ int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep);
  * kfd_criu_svm_range_priv_data
  */
 
-#define KFD_CRIU_PRIV_VERSION 1
+#define KFD_CRIU_PRIV_VERSION 2
 
 struct kfd_criu_process_priv_data {
 	uint32_t version;
@@ -1193,6 +1193,7 @@ struct kfd_criu_device_priv_data {
 struct kfd_criu_bo_priv_data {
 	uint64_t user_addr;
 	uint32_t idr_handle;
+	uint8_t uuid[16];	/* Unique Id of BO whose size is UUID_SIZE */
 	uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index c10d050e1a61..1969eb9375c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -716,7 +716,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 
 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
 						 pdd->drm_priv, mem, NULL,
-						 flags, false);
+						 flags, false, NULL);
 	if (err)
 		goto err_alloc_mem;
 
-- 
2.34.1



More information about the amd-gfx mailing list