[PATCH 11/23] drm/amdgpu: implement context save area(CSA) feature

Sat Dec 17 16:16:33 UTC 2016

CSA is need by world switch. This patch implement CSA feature and
bind it to each VM, so hardware can save the state into the area
and restore it when running again.

Signed-off-by: Monk Liu <Monk.Liu at amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +
 drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++
 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 +++++++++++++++++++++++++++++++
 5 files changed, 247 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 8ee70f8..dff1248 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -24,6 +24,8 @@
 #ifndef AMDGPU_VIRT_H
 #define AMDGPU_VIRT_H
 
+struct amdgpu_vm;
+
 #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */
 #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */
 #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */
@@ -33,6 +35,14 @@ struct amdgpu_virtualization {
 	uint32_t virtual_caps;
 };
 
+struct amdgpu_csa {
+	struct amdgpu_bo_va		*va;
+	struct ttm_validate_buffer	tv;
+	uint64_t			reserved_top;
+	uint64_t			csa_addr;
+	uint64_t			gds_addr;
+};
+
 #define amdgpu_sriov_enabled(adev) \
 ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
 
@@ -55,4 +65,8 @@ static inline bool is_virtual_machine(void)
 }
 
 int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
+
+/* Context Save Area functions */
+int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d05546e..98540d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1539,6 +1539,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	pd_size = amdgpu_vm_directory_size(adev);
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
+	vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;
+
 	/* allocate page table array */
 	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
@@ -1576,6 +1578,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 	amdgpu_bo_unreserve(vm->page_directory);
 
+	r = amdgpu_vm_map_csa(adev, vm);
+	if (r)
+		goto error_free_page_directory;
+
 	return 0;
 
 error_free_page_directory:
@@ -1606,6 +1612,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	int i;
 
+	amdgpu_vm_unmap_csa(adev, vm);
+
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 42a629b..d90630a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,6 +29,7 @@
 #include "gpu_scheduler.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
+#include "amdgpu_virt.h"
 
 struct amdgpu_bo_va;
 struct amdgpu_job;
@@ -109,6 +110,9 @@ struct amdgpu_vm {
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
 
+	/* Context Save Area */
+	struct amdgpu_csa	csa;
+
 	/* client id */
 	u64                     client_id;
 };
diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
index 6ab13bc..a25e07f 100644
--- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
+++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
@@ -26,12 +26,24 @@
 
 #include "amdgpu.h"
 
+/* xgpu structures */
+struct amd_xgpu_csa {
+	struct amdgpu_bo	    *robj;
+	uint64_t		    gpu_addr;
+	uint64_t		    gds_addr;
+	int32_t			    size;
+};
+
 struct amd_xgpu {
 	struct amdgpu_device	*adev;
 	struct mutex		lock;
+	struct amd_xgpu_csa	sa;
 	u32			reg_val_offs;
 };
 
 extern int amd_xgpu_alloc(struct amdgpu_device *adev);
 extern void amd_xgpu_free(struct amd_xgpu *xgpu);
+
+extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);
+extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);
 #endif
diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
new file mode 100644
index 0000000..246a747
--- /dev/null
+++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Xiangliang.Yu at amd.com
+ *	    Monk.Liu at amd.com
+ *
+ */
+#include "amd_mxgpu.h"
+#include "vid.h"
+
+static int xgpu_init_csa(struct amdgpu_device *adev, struct amd_xgpu_csa *sa)
+{
+	int r, size;
+	void *ptr;
+
+	/* meta data (4k) + gds-gfx (4k)*/
+	size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;
+
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_GTT,
+			     AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+			     NULL, NULL, &sa->robj);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate csa bo\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_reserve(sa->robj, true);
+	if (unlikely(r != 0))
+		goto error_free;
+
+	r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, &sa->gpu_addr);
+	if (r)
+		goto error_unreserve;
+
+	r = amdgpu_bo_kmap(sa->robj, &ptr);
+	if (r)
+		goto error_unpin;
+
+	memset(ptr, 0, size);
+	amdgpu_bo_unreserve(sa->robj);
+
+	sa->size = size;
+	sa->gds_addr = sa->gpu_addr + PAGE_SIZE;
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(sa->robj);
+error_unreserve:
+	amdgpu_bo_unreserve(sa->robj);
+error_free:
+	amdgpu_bo_unref(&sa->robj);
+	return r;
+}
+
+int xgpu_allocate_csa(struct amd_xgpu *xgpu)
+{
+	struct amdgpu_device *adev = xgpu->adev;
+	struct amd_xgpu_csa *sa = &xgpu->sa;
+
+	return xgpu_init_csa(adev, sa);
+}
+
+void xgpu_destroy_csa(struct amd_xgpu_csa *sa)
+{
+	amdgpu_bo_reserve(sa->robj, true);
+	amdgpu_bo_unpin(sa->robj);
+	amdgpu_bo_unreserve(sa->robj);
+	amdgpu_bo_unref(&sa->robj);
+	sa->gpu_addr = 0;
+	sa->gds_addr = 0;
+}
+
+static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   struct amd_xgpu_csa *sa)
+{
+	int r;
+	uint64_t vaddr;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
+	struct amdgpu_bo_list_entry pd;
+	struct amdgpu_bo_va *bo_va;
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+	INIT_LIST_HEAD(&vm->csa.tv.head);
+	vm->csa.tv.bo = &sa->robj->tbo;
+	vm->csa.tv.shared = true;
+
+	list_add(&vm->csa.tv.head, &list);
+	amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+	spin_lock(&vm->status_lock);
+	vm->csa.reserved_top -= sa->size;
+	vaddr = vm->csa.reserved_top;
+	spin_unlock(&vm->status_lock);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	if (r) {
+		DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
+		return r;
+	}
+
+	bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);
+	if (!bo_va) {
+		DRM_ERROR("failed to create bo_va for global CSA buffer.\n");
+		return -ENOMEM;
+	}
+
+
+	r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on global CSA buffer(%d).\n", r);
+		amdgpu_vm_bo_rmv(adev, bo_va);
+		ttm_eu_backoff_reservation(&ticket, &list);
+		kfree(bo_va);
+		return r;
+	}
+
+	ttm_eu_backoff_reservation(&ticket, &list);
+	amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);
+
+	vm->csa.va = bo_va;
+	vm->csa.csa_addr = vaddr;
+	vm->csa.gds_addr = vaddr + PAGE_SIZE;
+
+	return 0;
+}
+
+int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
+	struct amd_xgpu_csa *sa = NULL;
+
+	if (!xgpu)
+		return 0;
+
+	sa = &xgpu->sa;
+
+	return xgpu_vm_map_csa(adev, vm, sa);
+}
+
+static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			      struct amd_xgpu_csa *sa)
+{
+	int r;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
+	struct amdgpu_bo_list_entry pd;
+
+	if (!vm->csa.va)
+		return;
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+	list_add(&vm->csa.tv.head, &list);
+	amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	if (r) {
+		DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
+		return;
+	}
+
+	amdgpu_vm_bo_rmv(adev, vm->csa.va);
+	/* maybe we don't need to do real clearing for the vm will die soon */
+	r = amdgpu_vm_clear_freed(adev, vm);
+	if (r) {
+		DRM_ERROR("failed to clear global CSA bo(%d).\n", r);
+		return;
+	}
+
+	ttm_eu_backoff_reservation(&ticket, &list);
+	vm->csa.va = NULL;
+	vm->csa.csa_addr = vm->csa.gds_addr = 0;
+}
+
+void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
+	struct amd_xgpu_csa *sa = NULL;
+
+	if (!xgpu)
+		return;
+
+	sa = &xgpu->sa;
+	xgpu_vm_unmap_csa(adev, vm, sa);
+}
-- 
2.7.4