[Intel-xe] [RFC] drm/xe: Add VM snapshot support
Souza, Jose
jose.souza at intel.com
Mon Sep 25 14:11:03 UTC 2023
On Sun, 2023-09-24 at 22:02 +0200, Maarten Lankhorst wrote:
> Just an idea I had so far. Some opens:
> - Do we want to set a flag on a VM_BIND or on a BO to choose what to
> snapshot? Likely VM_BIND.
+1 vote for vm_bind otherwise we miss external and userptr BOs in the snapshot.
> - Handle BO mapping in atomic context? Right now I bind the mapping on VM_BIND,
> because it's easier there when we have all the locks. Due to signaling
> context usage, we can never take the BO lock there reliably..
>
> Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
> ---
> drivers/gpu/drm/xe/xe_bo.c | 5 +-
> drivers/gpu/drm/xe/xe_devcoredump.c | 9 ++
> drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +
> drivers/gpu/drm/xe/xe_vm.c | 126 ++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_vm.h | 6 ++
> drivers/gpu/drm/xe/xe_vm_types.h | 19 ++++
> 6 files changed, 166 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 27726d4f3423..1f6229da2b2d 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -469,6 +469,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
>
> trace_xe_vma_evict(vma);
>
> + xe_vma_move_notify(vma);
> +
> if (xe_vm_in_fault_mode(vm)) {
> /* Wait for pending binds / unbinds. */
> long timeout;
> @@ -1799,7 +1801,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> return -EINVAL;
>
> bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
> - }
> + } else if (!(bo_flags & XE_BO_CREATE_VRAM_MASK))
> + bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
>
> if (args->vm_id) {
> vm = xe_vm_lookup(xef, args->vm_id);
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 68abc0b195be..298be162ed0c 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -16,6 +16,7 @@
> #include "xe_guc_ct.h"
> #include "xe_guc_submit.h"
> #include "xe_hw_engine.h"
> +#include "xe_vm.h"
>
> /**
> * DOC: Xe device coredump
> @@ -98,6 +99,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> if (coredump->snapshot.hwe[i])
> xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
> &p);
> + if (coredump->snapshot.vm) {
> + drm_printf(&p, "\n**** VM state ****\n");
> + xe_vm_snapshot_print(coredump->snapshot.vm, &p);
> + }
>
> return count - iter.remain;
> }
> @@ -116,6 +121,7 @@ static void xe_devcoredump_free(void *data)
> for (i = 0; i < XE_NUM_HW_ENGINES; i++)
> if (coredump->snapshot.hwe[i])
> xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
> + xe_vm_snapshot_free(coredump->snapshot.vm);
>
> coredump->captured = false;
> drm_info(&coredump_to_xe(coredump)->drm,
> @@ -151,6 +157,8 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>
> coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
> coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
> + if (q->vm)
> + coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
>
> for_each_hw_engine(hwe, q->gt, id) {
> if (hwe->class != q->hwe->class ||
> @@ -194,3 +202,4 @@ void xe_devcoredump(struct xe_exec_queue *q)
> xe_devcoredump_read, xe_devcoredump_free);
> }
> #endif
> +
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index 7fdad9c3d3dd..93c2ad7bdc54 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -33,6 +33,8 @@ struct xe_devcoredump_snapshot {
> struct xe_guc_submit_exec_queue_snapshot *ge;
> /** @hwe: HW Engine snapshot array */
> struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
> + /** @vm: Snapshot of VM state */
> + struct xe_vm_snapshot *vm;
> };
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 2b225c0692a6..276b03847ecc 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -889,6 +889,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
> if (is_null)
> vma->gpuva.flags |= DRM_GPUVA_SPARSE;
>
> + if (bo && bo->flags & XE_BO_NEEDS_CPU_ACCESS) {
> + INIT_LIST_HEAD(&vma->snap.link);
> + vma->gpuva.flags |= XE_VMA_SNAPSHOTTABLE;
> + }
> +
> if (tile_mask) {
> vma->tile_mask = tile_mask;
> } else {
> @@ -1238,6 +1243,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>
> INIT_LIST_HEAD(&vm->extobj.list);
>
> + mutex_init(&vm->snap.lock);
> + INIT_LIST_HEAD(&vm->snap.list);
> +
> if (!(flags & XE_VM_FLAG_MIGRATION))
> xe_device_mem_access_get(xe);
>
> @@ -1354,6 +1362,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> dma_resv_unlock(&vm->resv);
> drm_gpuva_manager_destroy(&vm->mgr);
> err_put:
> + mutex_destroy(&vm->snap.lock);
> dma_resv_fini(&vm->resv);
> for_each_tile(tile, xe, id)
> xe_range_fence_tree_fini(&vm->rftree[id]);
> @@ -1638,6 +1647,14 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> cf ? &cf->base : fence);
> }
>
> + if (vma->gpuva.flags & XE_VMA_SNAPSHOTTABLE &&
> + !list_empty(&vma->snap.link)) {
> + mutex_lock(&vm->snap.lock);
> + list_del(&vma->snap.link);
> + vm->snap.num--;
> + mutex_unlock(&vm->snap.lock);
> + }
> +
> return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
>
> err_fences:
> @@ -1669,6 +1686,13 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
>
> trace_xe_vma_bind(vma);
>
> + /* Map for coredump */
> + if (vma->gpuva.flags & XE_VMA_SNAPSHOTTABLE) {
> + err = xe_bo_vmap(xe_vma_bo(vma));
> + if (err)
> + return ERR_PTR(err);
> + }
> +
> if (number_tiles > 1) {
> fences = kmalloc_array(number_tiles, sizeof(*fences),
> GFP_KERNEL);
> @@ -1715,6 +1739,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> cf ? &cf->base : fence);
> }
>
> + if (vma->gpuva.flags & XE_VMA_SNAPSHOTTABLE &&
> + list_empty(&vma->snap.link)) {
> + mutex_lock(&vm->snap.lock);
> + list_add_tail(&vma->snap.link, &vm->snap.list);
> + vm->snap.num++;
> + mutex_unlock(&vm->snap.lock);
> + }
> +
> return cf ? &cf->base : fence;
>
> err_fences:
> @@ -3561,3 +3593,97 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
>
> return 0;
> }
> +
> +struct xe_vm_snapshot {
> + unsigned long num_snaps;
> + struct {
> + uint64_t ofs;
> + unsigned long len;
> + void *data;
> + } snap[];
> +};
> +
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
> +{
> + unsigned long num_snaps, i;
> + struct xe_vm_snapshot *snap;
> + struct xe_vma *vma;
> +
> + mutex_lock(&vm->snap.lock);
> + num_snaps = vm->snap.num;
> +
> + snap = kvmalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
> + if (!snap)
> + goto out_unlock;
> +
> + snap->num_snaps = num_snaps;
> + i = 0;
> + list_for_each_entry(vma, &vm->snap.list, snap.link) {
> + struct xe_bo *bo = gem_to_xe_bo(vma->gpuva.gem.obj);
> + unsigned long bo_ofs = xe_vma_bo_offset(vma);
> +
> + snap->snap[i].ofs = xe_vma_start(vma);
> + snap->snap[i].len = xe_vma_size(vma);
> + snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_NOWAIT);
> + if (!snap->snap[i].data)
> + goto next;
> +
> + /* TODO: Some way around trylock? */
> + xe_map_memcpy_from(vm->xe, snap->snap[i].data,
> + &bo->vmap, bo_ofs, snap->snap[i].len);
> +
> +next:
> + i++;
> + }
> +
> +out_unlock:
> + mutex_unlock(&vm->snap.lock);
> + return snap;
> +}
> +
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
> +{
> + unsigned long i, j;
> +
> + for (i = 0; i < snap->num_snaps; i++) {
> + if (!snap->snap[i].data) {
> + drm_printf(p, "Unable to capture range [%llx-%llx]\n",
> + snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1);
> + continue;
> + }
> +
> + for (j = 0; j < snap->snap[i].len; j += 64) {
> + uint32_t *x = snap->snap[i].data + j;
> +
> + drm_printf(p, "[%llx] = { %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x }\n",
> + snap->snap[i].ofs + j, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
> + x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]);
> + }
> + }
> +}
> +
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
> +{
> + unsigned long i;
> +
> + for (i = 0; i < snap->num_snaps; i++)
> + kvfree(snap->snap[i].data);
> + kvfree(snap);
> +}
> +
> +void xe_vma_move_notify(struct xe_vma *vma)
> +{
> + struct xe_vm *vm = xe_vma_vm(vma);
> +
> + if (!(vma->gpuva.flags & XE_VMA_SNAPSHOTTABLE))
> + return;
> +
> + if (list_empty(&vma->snap.link))
> + return;
> +
> + mutex_lock(&vm->snap.lock);
> + list_del(&vma->snap.link);
> + vm->snap.num--;
> + mutex_unlock(&vm->snap.lock);
> +}
> +
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index f966ed39b711..b0b96f158f8b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -234,3 +234,9 @@ static inline void vm_dbg(const struct drm_device *dev,
> { /* noop */ }
> #endif
> #endif
> +
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
> +void xe_vma_move_notify(struct xe_vma *vma);
> +
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 52e5eaed91c3..eb558e5a7f27 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -33,6 +33,7 @@ struct xe_vm;
> #define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5)
> #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6)
> #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7)
> +#define XE_VMA_SNAPSHOTTABLE (DRM_GPUVA_USERBITS << 8)
>
> /** struct xe_userptr - User pointer */
> struct xe_userptr {
> @@ -123,6 +124,14 @@ struct xe_vma {
> struct list_head link;
> } extobj;
>
> + struct {
> + /**
> + * @snap.link: Link into list of xe_vm's snapshottable vma's.
> + * protected by vm->snap.lock.
> + */
> + struct list_head link;
> + } snap;
> +
> /**
> * @userptr: user pointer state, only allocated for VMAs that are
> * user pointers
> @@ -336,6 +345,16 @@ struct xe_vm {
>
> /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
> bool batch_invalidate_tlb;
> +
> + /** @snap: Snapshot support structures */
> + struct {
> + /** @mutex: Mutex held in signaling context */
> + struct mutex lock;
> + /** @list: List of all vma's to snapshot */
> + struct list_head list;
> + /** @num: Number of snapshottable vma's */
> + unsigned long num;
> + } snap;
> };
>
> /** struct xe_vma_op_map - VMA map operation */
More information about the Intel-xe
mailing list