[PATCH] drm/amd: Enable checkpoint and restore of VRAM Bos with no VA
Ramesh Errabolu
Ramesh.Errabolu at amd.com
Thu Nov 16 02:47:05 UTC 2023
Tag VRAM BOs that do not have a VA with a unique Id, a 128-bit
UUID. This unique Id is used to distinguish BOs that might
otherwise be of same size. Checkpoint and restore assumes
that these BOs are not imported into a DRM device that is
accessible either from current process or its parent or
child process
Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 29 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 190 ++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 103 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 17 ++
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 ++-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +-
9 files changed, 370 insertions(+), 10 deletions(-)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 260e32ef7bae..851e2c4db372 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -270,7 +270,8 @@ amdgpu-y += \
amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
- amdgpu_amdkfd_gfx_v11.o
+ amdgpu_amdkfd_gfx_v11.o \
+ amdgpu_criu.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fcf8a98ad15e..6c0d7e6a66cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -289,7 +289,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags, bool criu_resume);
+ uint64_t *offset, uint32_t flags,
+ bool criu_resume, uuid_t *uuid);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2e302956a279..b139ffd519e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
+#include <linux/uuid.h>
#include <drm/ttm/ttm_tt.h>
#include "amdgpu_object.h"
@@ -35,6 +36,7 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
+#include "amdgpu_criu.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
@@ -1718,7 +1720,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags, bool criu_resume)
+ uint64_t *offset, uint32_t flags,
+ bool criu_resume, uuid_t *uuid)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
@@ -1814,13 +1817,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
va, (*mem)->aql_queue ? size << 1 : size,
domain_string(alloc_domain), xcp_id);
- ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
- bo_type, NULL, &gobj, xcp_id + 1);
+ /* Construction of VRAM BO one with no VA, during CRIU Restore
+ * should consult BO table. Will return either a previously
+ * constructed BO or will construct a BO anew
+ */
+ if (criu_resume && (va == 0) && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+ ret = restore_vram_bo(adev, aligned_size, 1, alloc_flags, uuid,
+ &gobj, xcp_id + 1);
+ else
+ ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain,
+ alloc_flags, bo_type, NULL, &gobj, xcp_id + 1);
+
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
goto err_bo_create;
}
+
ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
if (ret) {
pr_debug("Failed to allow vma node access. ret %d\n", ret);
@@ -1843,6 +1856,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+ /* Initialize the UUID field of a BO that:
+ * - Represents a VRAM BO
+ * - Does not have a VA bound
+ * - Is allocated outside CRIU Resume procedure
+ */
+ if (!criu_resume && (va == 0) && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) {
+ if (uuid_is_null(&bo->uuid))
+ uuid_gen(&bo->uuid);
+ }
+
if (user_addr) {
pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
ret = init_user_pages(*mem, user_addr, criu_resume);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
new file mode 100644
index 000000000000..4b43a3df6913
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_criu.h"
+
+/*
+ * Hash table to host BOs that have their unique IDs initialized
+ * The table comes into play during CRIU Restore procedure
+ *
+ * @note: Currently these BOs encapsulate device memory i.e. are
+ * VRAM BOs
+ */
+DECLARE_HASHTABLE(criu_bo_table, CRIU_BO_TABLE_SIZE);
+static DEFINE_MUTEX(criu_mutex);
+
+/* Global counter to track life of Hash table */
+atomic_t criu_bo_counter = ATOMIC_INIT(0);
+
+void print_uuid(uuid_t *uuid)
+{
+ pr_err("\n");
+ for (int idx = 0; idx < 16; idx++)
+ pr_err("Idx[%d] %d\n", idx, uuid->b[idx]);
+ pr_err("\n");
+}
+
+void print_uuid_compare(uuid_t *uuid1, uuid_t *uuid2)
+{
+ pr_err("\n");
+ for (int idx = 0; idx < 16; idx++)
+ pr_err("Idx[%d] %d, %d\n", idx, uuid1->b[idx], uuid2->b[idx]);
+ pr_err("\n");
+}
+
+void inc_table_counter(uint32_t cntr)
+{
+ int init;
+
+ mutex_lock(&criu_mutex);
+ init = atomic_read(&criu_bo_counter);
+ if (init == 0x00) {
+ pr_debug("%s(), Invoking hash_init api\n", __func__);
+ hash_init(criu_bo_table);
+ }
+
+ atomic_add(cntr, &criu_bo_counter);
+ init = atomic_read(&criu_bo_counter);
+ mutex_unlock(&criu_mutex);
+}
+
+static void free_bo_table(void)
+{
+ struct criu_bo_uuid *bo_uuid = NULL;
+ uint32_t bkt;
+
+ hash_for_each_rcu(criu_bo_table, bkt, bo_uuid, node)
+ hash_del_rcu(&bo_uuid->node);
+}
+
+void dec_table_counter(uint32_t cntr)
+{
+ uint32_t deinit;
+
+ mutex_lock(&criu_mutex);
+ atomic_sub(cntr, &criu_bo_counter);
+ deinit = atomic_read(&criu_bo_counter);
+ if (deinit == 0x00) {
+ pr_debug("%s(), Invoking free_bo_table api\n", __func__);
+ free_bo_table();
+ }
+
+ if (deinit < 0)
+ pr_err("%s(), BO Table counter is inconsistent: %d\n", __func__, deinit);
+
+ mutex_unlock(&criu_mutex);
+}
+
+uint32_t query_table_counter(void)
+{
+ uint32_t cntr;
+
+ mutex_lock(&criu_mutex);
+ cntr = atomic_read(&criu_bo_counter);
+ mutex_unlock(&criu_mutex);
+ return cntr;
+}
+
+/* Determine if BO is present in Hash table */
+static void add_bo_uuid(struct criu_bo_uuid *bo_uuid)
+{
+ mutex_lock(&criu_mutex);
+ hash_add_rcu(criu_bo_table, &bo_uuid->node, (uintptr_t)bo_uuid->uuid);
+ mutex_unlock(&criu_mutex);
+}
+
+/* Determine if BO is present in Hash table
+ *
+ * @note: Does the look up object based on value of key
+ * and not just its integer value
+ */
+static struct criu_bo_uuid *get_bo_uuid(uuid_t *uuid)
+{
+ struct criu_bo_uuid *bo_uuid = NULL;
+ uint32_t bkt;
+
+ mutex_lock(&criu_mutex);
+ hash_for_each_rcu(criu_bo_table, bkt, bo_uuid, node)
+ if (uuid_equal(uuid, bo_uuid->uuid))
+ goto ret_abo;
+
+ret_abo:
+ mutex_unlock(&criu_mutex);
+ return bo_uuid;
+}
+
+int restore_vram_bo(struct amdgpu_device *adev,
+ unsigned long size, int align, u64 flags,
+ uuid_t *uuid, struct drm_gem_object **gobj, int8_t xcp_id_plus1)
+{
+ enum ttm_bo_type bo_type = ttm_bo_type_device;
+ u32 domain = AMDGPU_GEM_DOMAIN_VRAM;
+ struct criu_bo_uuid *bo_uuid;
+ struct amdgpu_bo *abo;
+ int ret;
+
+ /* Determine if VRAM was built originally for exporting it
+ * to peers. Currently the only VRAM BOs that are exportable
+ * are those that do not have a VA attached
+ */
+ if (unlikely(uuid == NULL)) {
+ pr_err("A NULL UUID is Illegal for VRAM BOs without a VA\n");
+ return -EINVAL;
+ }
+
+ /* Determine if BO is already present in hash table */
+ bo_uuid = get_bo_uuid(uuid);
+
+ /* Return the BO present in table */
+ if (bo_uuid != NULL) {
+ abo = bo_uuid->abo;
+ *gobj = &(abo->tbo.base);
+ return 0;
+ }
+
+ /* Build the BO and add it to table before returning it */
+ ret = amdgpu_gem_object_create(adev, size, align,
+ domain, flags, bo_type, NULL, gobj, xcp_id_plus1);
+ if (ret) {
+ pr_err("Failed to Restore VRAM BO, Retval: %d\n", ret);
+ return ret;
+ }
+
+ /* Re-init uuid of BO that identifies it uniquely and
+ * add the BO into the table
+ */
+ abo = gem_to_amdgpu_bo(*gobj);
+ uuid_copy(&(abo->uuid), uuid);
+ bo_uuid = kzalloc(sizeof(*bo_uuid), GFP_KERNEL);
+ if (bo_uuid == NULL)
+ return -EINVAL;
+ bo_uuid->abo = abo;
+ bo_uuid->uuid = uuid;
+ add_bo_uuid(bo_uuid);
+
+ /* Return the BO that was built */
+ *gobj = &(abo->tbo.base);
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
new file mode 100644
index 000000000000..b895c698a2e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef AMDGPU_CRIU_H_INCLUDED
+#define AMDGPU_CRIU_H_INCLUDED
+
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/pagemap.h>
+#include <linux/dma-buf.h>
+#include <linux/sched/mm.h>
+#include <linux/hashtable.h>
+#include <linux/sched/task.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+#include "amdgpu_object.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+
+/* Specify Hash table and its size to host VRAM BOs that have their
+ * unique IDs iniialized. These BOs which can be exported as Dmabuf
+ * allows user space to bind different virtual addresses on different
+ * DRM devices.
+ *
+ * @note: Currently these BOs encapsulate device memory i.e. are VRAM BOs
+ */
+#define CRIU_BO_TABLE_SIZE 8
+extern DECLARE_HASHTABLE(criu_bo_table, CRIU_BO_TABLE_SIZE);
+
+struct criu_bo_uuid {
+
+ /* Unique ID of BO, serves the role of KEY */
+ uuid_t *uuid;
+
+ /* Handle of BO, serves the role of VALUE */
+ struct amdgpu_bo *abo;
+
+ /* Allows chaining of BO being managed by table */
+ struct hlist_node node;
+};
+
+
+/* Global counter to track life of Hash table */
+extern atomic_t criu_bo_counter;
+
+/**
+ * restore_vram_bo() - Returns handle of a GEM object either by look up
+ * or by construction. Look up a Global BO table to determine if the BO
+ * of concern has already been constructed. By construction if the look
+ * up fails to find the BO in the global BO table
+ *
+ * NOTE: Following two conditions must be held TRUE when invoking this method
+ * - This method deals with VRAM BOs only. Invoking this method to handle
+ * BOs of other kinds is invalid.
+ * - This method is invoked during a CRIU Restore procedure. An Invocation
+ * outside of this scneario is invalid
+ *
+ * @adev: Handle of device to use in construction
+ * @size: BO's memory size in bytes
+ * @align: Alignment requirements, if any, in allocating memory
+ * @flags: Flags to apply in allocating memory
+ * @uuid: Handle of UUID object to be restored
+ * @gobj: Output parameter updated with handle of GEM object
+ * @xcp_id_plus1: ID of the XCD on which BO is to be created
+ *
+ * Return: ZERO if successful, a negative value in case of error
+ */
+int restore_vram_bo(struct amdgpu_device *adev,
+ unsigned long size, int align, u64 flags,
+ uuid_t *uuid, struct drm_gem_object **gobj,
+ int8_t xcp_id_plus1);
+
+void print_uuid(uuid_t *uuid);
+void print_uuid_compare(uuid_t *uuid1, uuid_t *uuid2);
+
+uint32_t query_table_counter(void);
+void inc_table_counter(uint32_t cntr);
+void dec_table_counter(uint32_t cntr);
+
+#endif /* AMDGPU_CRIU_H_INCLUDED */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d28e21baef16..dc61b252fe49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -117,6 +117,23 @@ struct amdgpu_bo {
* for memory accounting.
*/
int8_t xcp_id;
+
+ /*
+ * @uuid: Unique ID of a BO that is being exported. The 128-bit ID is
+ * considered to be unique across processes and time. One use of this
+ * ID is to support CRIU operations of Checkpointing & Restore.
+ *
+ * ID is a byte array of length UUID_SIZE. This is to accommodate UUID,
+ * a 128-bit number defined by RFC 4122. Hex string form of UUID is
+ * defined as a sequence of 32 hexadecimal digits, divided into five
+ * groups that are delimited by hyphens "-". The sequence of groups
+ * from length perspective is: 8-4-4-4-12.
+ *
+ * The default value of this field is set ZEROS. It is initialized to a
+ * NON-ZERO value when a BO is exported using GEM Prime Apis. Currently
+ * the only BOs that can be exported are GTT and VRAM BOs.
+ */
+ uuid_t uuid;
};
struct amdgpu_bo_user {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..310a48b627ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
#include <linux/dma-buf.h>
#include <linux/fdtable.h>
#include <linux/processor.h>
+#include <linux/uuid.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_svm.h"
@@ -45,6 +46,7 @@
#include "kfd_smi_events.h"
#include "amdgpu_dma_buf.h"
#include "kfd_debug.h"
+#include "amdgpu_criu.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -1147,7 +1149,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->adev, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
- flags, false);
+ flags, false, NULL);
if (err)
goto err_unlock;
@@ -1848,6 +1850,12 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
idr_for_each_entry(&pdd->alloc_idr, mem, id) {
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+ /* Count BOs whose VA is either zero or is equal
+ * to or exceed GPUVMs base address
+ *
+ * @note: BOs whose VA is below GPUVM base are
+ * used internally, e.g. Trap handler buffer
+ */
if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
num_of_bos++;
}
@@ -1936,6 +1944,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
bo_priv->idr_handle = id;
+ /* Copy uuid of BO that identifies it uniquely
+ * Currently this is true for only VRAM BOs that
+ * have been exported
+ */
+ uuid_copy((uuid_t *)bo_priv->uuid, &kgd_mem->bo->uuid);
+
if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
&bo_priv->user_addr);
@@ -2295,6 +2309,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
int ret;
const bool criu_resume = true;
u64 offset;
+ uuid_t *uuid;
if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (bo_bucket->size !=
@@ -2318,10 +2333,17 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
offset = bo_priv->user_addr;
}
- /* Create the BO */
+
+ /* Acquire handle of UUID of BO if need be */
+ uuid = NULL;
+ if ((bo_bucket->addr == 0) &&
+ (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+ uuid = (uuid_t *)bo_priv->uuid;
+
ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
bo_bucket->size, pdd->drm_priv, kgd_mem,
- &offset, bo_bucket->alloc_flags, criu_resume);
+ &offset, bo_bucket->alloc_flags,
+ criu_resume, uuid);
if (ret) {
pr_err("Could not create the BO\n");
return ret;
@@ -2728,10 +2750,12 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
ret = criu_unpause(filep, p, args);
break;
case KFD_CRIU_OP_RESTORE:
+ inc_table_counter(p->n_pdds);
ret = criu_restore(filep, p, args);
break;
case KFD_CRIU_OP_RESUME:
ret = criu_resume(filep, p, args);
+ dec_table_counter(p->n_pdds);
break;
default:
dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a40f8cfc6aa5..320408239896 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1178,7 +1178,7 @@ int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep);
* kfd_criu_svm_range_priv_data
*/
-#define KFD_CRIU_PRIV_VERSION 1
+#define KFD_CRIU_PRIV_VERSION 2
struct kfd_criu_process_priv_data {
uint32_t version;
@@ -1193,6 +1193,7 @@ struct kfd_criu_device_priv_data {
struct kfd_criu_bo_priv_data {
uint64_t user_addr;
uint32_t idr_handle;
+ uint8_t uuid[16]; /* Unique Id of BO whose size is UUID_SIZE */
uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index c10d050e1a61..1969eb9375c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -716,7 +716,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
pdd->drm_priv, mem, NULL,
- flags, false);
+ flags, false, NULL);
if (err)
goto err_alloc_mem;
--
2.34.1
More information about the amd-gfx
mailing list