[RFC 11/19] drm/xe/eudebug: vm open/pread/pwrite

Gwan-gyeong Mun gwan-gyeong.mun at intel.com
Mon Oct 21 09:58:50 UTC 2024


From: Mika Kuoppala <mika.kuoppala at linux.intel.com>

Debugger needs access to the client's vm to read and write. For
example inspecting ISA/ELF and setting up breakpoints.

Add ioctl to open target vm with debugger client and vm_handle
and hook up pread/pwrite possibility.

Open will take timeout argument so that standard fsync
can be used for explicit flushing between cpu/gpu for
the target vm.

Implement this for bo backed storage. userptr will
be done in following patch.

v2: - checkpatch (Maciej)
    - 32bit fixes (Andrzej)
    - bo_vmap (Mika)
    - fix vm leak if can't allocate k_buffer (Mika)
    - assert vm write held for vma (Matthew)

v3: update to use changed xe_force_wake_get() failure handling (G.G.)

Cc: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Signed-off-by: Maciej Patelczyk <maciej.patelczyk at intel.com>
Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  24 ++
 drivers/gpu/drm/xe/xe_eudebug.c      | 481 +++++++++++++++++++++++++++
 include/uapi/drm/xe_drm_eudebug.h    |  18 +
 3 files changed, 523 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 20fbb13ddf7a..0e298d2fc93f 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -549,6 +549,30 @@
 #define   CCS_MODE_CSLICE(cslice, ccs) \
 	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
 
+#define RCU_ASYNC_FLUSH				XE_REG(0x149fc)
+#define   RCU_ASYNC_FLUSH_IN_PROGRESS	REG_BIT(31)
+#define   RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT	28
+#define   RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1 REG_BIT(26)
+#define   RCU_ASYNC_FLUSH_AMFS		REG_BIT(8)
+#define   RCU_ASYNC_FLUSH_PREFETCH	REG_BIT(7)
+#define   RCU_ASYNC_FLUSH_DATA_PORT	REG_BIT(6)
+#define   RCU_ASYNC_FLUSH_DATA_CACHE	REG_BIT(5)
+#define   RCU_ASYNC_FLUSH_HDC_PIPELINE	REG_BIT(4)
+#define   RCU_ASYNC_INVALIDATE_HDC_PIPELINE REG_BIT(3)
+#define   RCU_ASYNC_INVALIDATE_CONSTANT_CACHE REG_BIT(2)
+#define   RCU_ASYNC_INVALIDATE_TEXTURE_CACHE REG_BIT(1)
+#define   RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE REG_BIT(0)
+#define   RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL ( \
+	RCU_ASYNC_FLUSH_AMFS | \
+	RCU_ASYNC_FLUSH_PREFETCH | \
+	RCU_ASYNC_FLUSH_DATA_PORT | \
+	RCU_ASYNC_FLUSH_DATA_CACHE | \
+	RCU_ASYNC_FLUSH_HDC_PIPELINE | \
+	RCU_ASYNC_INVALIDATE_HDC_PIPELINE | \
+	RCU_ASYNC_INVALIDATE_CONSTANT_CACHE | \
+	RCU_ASYNC_INVALIDATE_TEXTURE_CACHE | \
+	RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE)
+
 #define RCU_DEBUG_1				XE_REG(0x14a00)
 #define   RCU_DEBUG_1_ENGINE_STATUS		REG_GENMASK(2, 0)
 #define   RCU_DEBUG_1_RUNALONE_ACTIVE		REG_BIT(2)
diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
index d53060afda68..309358b575d6 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -5,9 +5,12 @@
 
 #include <linux/anon_inodes.h>
 #include <linux/delay.h>
+#include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 
+#include <drm/drm_drv.h>
 #include <drm/drm_managed.h>
 
 #include <generated/xe_wa_oob.h>
@@ -16,6 +19,7 @@
 #include "regs/xe_engine_regs.h"
 
 #include "xe_assert.h"
+#include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_eudebug.h"
 #include "xe_eudebug_types.h"
@@ -1219,6 +1223,8 @@ static long xe_eudebug_eu_control(struct xe_eudebug *d, const u64 arg)
 	return ret;
 }
 
+static long xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg);
+
 static long xe_eudebug_ioctl(struct file *file,
 			     unsigned int cmd,
 			     unsigned long arg)
@@ -1243,6 +1249,11 @@ static long xe_eudebug_ioctl(struct file *file,
 		ret = xe_eudebug_ack_event_ioctl(d, cmd, arg);
 		eu_dbg(d, "ioctl cmd=EVENT_ACK ret=%ld\n", ret);
 		break;
+	case DRM_XE_EUDEBUG_IOCTL_VM_OPEN:
+		ret = xe_eudebug_vm_open_ioctl(d, arg);
+		eu_dbg(d, "ioctl cmd=VM_OPEN ret=%ld\n", ret);
+		break;
+
 	default:
 		ret = -EINVAL;
 	}
@@ -2945,3 +2956,473 @@ void xe_eudebug_ufence_fini(struct xe_user_fence *ufence)
 	xe_eudebug_put(ufence->eudebug.debugger);
 	ufence->eudebug.debugger = NULL;
 }
+
+static int xe_eudebug_bovma_access(struct xe_bo *bo, u64 offset,
+				    void *buf, u64 len, bool write)
+{
+	struct xe_device * const xe = xe_bo_device(bo);
+	int ret;
+	u32 flags_orig;
+
+	/* XXX: require pin?. Assert bo->vm held */
+	ret = xe_bo_lock(bo, true);
+	if (ret)
+		return ret;
+
+	/*
+	 * XXX: we want to use xe_bo_vmap but it insist that userspace
+	 * has provided it's need for CPU access. But that is the client
+	 * and we are the debugger. So we promote temporarily with
+	 * flag to allow xe_bo_vmap to work in our case even if the client
+	 * did not need cpu map.
+	 *
+	 * XXX: Fix this by adding extra flags to xe_bo_vmap?
+	 */
+	flags_orig = bo->flags;
+	bo->flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
+
+	ret = xe_bo_vmap(bo);
+	if (!ret) {
+		if (write)
+			xe_map_memcpy_to(xe, &bo->vmap, offset, buf, len);
+		else
+			xe_map_memcpy_from(xe, buf, &bo->vmap, offset, len);
+
+		xe_bo_vunmap(bo);
+
+		ret = len;
+	}
+	bo->flags = flags_orig;
+
+	xe_bo_unlock(bo);
+
+	return ret;
+}
+
+static int xe_eudebug_vma_access(struct xe_vma *vma, u64 offset,
+				 void *buf, u64 len, bool write)
+{
+	struct xe_bo *bo;
+	u64 bytes;
+
+	lockdep_assert_held_write(&xe_vma_vm(vma)->lock);
+
+	if (XE_WARN_ON(offset >= xe_vma_size(vma)))
+		return -EINVAL;
+
+	bytes = min_t(u64, len, xe_vma_size(vma) - offset);
+	if (!bytes)
+		return 0;
+
+	bo = xe_bo_get(xe_vma_bo(vma));
+	if (bo) {
+		int ret;
+
+		ret = xe_eudebug_bovma_access(bo, offset, buf, bytes, write);
+		xe_bo_put(bo);
+
+		return ret;
+	}
+
+	return -EINVAL;
+}
+
+static int xe_eudebug_vm_access(struct xe_vm *vm, u64 offset,
+				void *buf, u64 len, bool write)
+{
+	struct xe_vma *vma;
+	int ret;
+
+	down_write(&vm->lock);
+
+	vma = xe_vm_find_overlapping_vma(vm, offset, len);
+	if (vma) {
+		/* XXX: why find overlapping returns below start? */
+		if (offset < xe_vma_start(vma) ||
+		    offset >= (xe_vma_start(vma) + xe_vma_size(vma))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		/* Offset into vma */
+		offset -= xe_vma_start(vma);
+		ret = xe_eudebug_vma_access(vma, offset, buf, len, write);
+	} else {
+		ret = -EINVAL;
+	}
+
+out:
+	up_write(&vm->lock);
+
+	return ret;
+}
+
+struct vm_file {
+	struct xe_eudebug *debugger;
+	struct xe_file *xef;
+	struct xe_vm *vm;
+	u64 flags;
+	u64 client_id;
+	u64 vm_handle;
+	u64 timeout_ns;
+};
+
+static ssize_t __vm_read_write(struct xe_vm *vm,
+			       void *bb,
+			       char __user *r_buffer,
+			       const char __user *w_buffer,
+			       unsigned long offset,
+			       unsigned long len,
+			       const bool write)
+{
+	ssize_t ret;
+
+	if (!len)
+		return 0;
+
+	if (write) {
+		ret = copy_from_user(bb, w_buffer, len);
+		if (ret)
+			return -EFAULT;
+
+		ret = xe_eudebug_vm_access(vm, offset, bb, len, true);
+		if (ret < 0)
+			return ret;
+
+		len = ret;
+	} else {
+		ret = xe_eudebug_vm_access(vm, offset, bb, len, false);
+		if (ret < 0)
+			return ret;
+
+		len = ret;
+
+		ret = copy_to_user(r_buffer, bb, len);
+		if (ret)
+			return -EFAULT;
+	}
+
+	return len;
+}
+
+static struct xe_vm *find_vm_get(struct xe_eudebug *d, const u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&d->res->lock);
+	vm = find_resource__unlocked(d->res, XE_EUDEBUG_RES_TYPE_VM, id);
+	if (vm)
+		xe_vm_get(vm);
+
+	mutex_unlock(&d->res->lock);
+
+	return vm;
+}
+
+static ssize_t __xe_eudebug_vm_access(struct file *file,
+				      char __user *r_buffer,
+				      const char __user *w_buffer,
+				      size_t count, loff_t *__pos)
+{
+	struct vm_file *vmf = file->private_data;
+	struct xe_eudebug * const d = vmf->debugger;
+	struct xe_device * const xe = d->xe;
+	const bool write = !!w_buffer;
+	struct xe_vm *vm;
+	ssize_t copied = 0;
+	ssize_t bytes_left = count;
+	ssize_t ret;
+	unsigned long alloc_len;
+	loff_t pos = *__pos;
+	void *k_buffer;
+
+	if (XE_IOCTL_DBG(xe, write && r_buffer))
+		return -EINVAL;
+
+	vm = find_vm_get(d, vmf->vm_handle);
+	if (XE_IOCTL_DBG(xe, !vm))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, vm != vmf->vm)) {
+		eu_warn(d, "vm_access(%s): vm handle mismatch client_handle=%llu, vm_handle=%llu, flags=0x%llx, pos=%llu, count=%zu\n",
+			write ? "write" : "read",
+			vmf->client_id, vmf->vm_handle, vmf->flags, pos, count);
+		xe_vm_put(vm);
+		return -EINVAL;
+	}
+
+	if (!count) {
+		xe_vm_put(vm);
+		return 0;
+	}
+
+	alloc_len = min_t(unsigned long, ALIGN(count, PAGE_SIZE), 64 * SZ_1M);
+	do  {
+		k_buffer = vmalloc(alloc_len);
+		if (k_buffer)
+			break;
+
+		alloc_len >>= 1;
+	} while (alloc_len > PAGE_SIZE);
+
+	if (XE_IOCTL_DBG(xe, !k_buffer)) {
+		xe_vm_put(vm);
+		return -ENOMEM;
+	}
+
+	do {
+		const ssize_t len = min_t(ssize_t, bytes_left, alloc_len);
+
+		ret = __vm_read_write(vm, k_buffer,
+				      write ? NULL : r_buffer + copied,
+				      write ? w_buffer + copied : NULL,
+				      pos + copied,
+				      len,
+				      write);
+		if (ret <= 0)
+			break;
+
+		bytes_left -= ret;
+		copied += ret;
+	} while (bytes_left > 0);
+
+	vfree(k_buffer);
+	xe_vm_put(vm);
+
+	if (XE_WARN_ON(copied < 0))
+		copied = 0;
+
+	*__pos += copied;
+
+	return copied ?: ret;
+}
+
+static ssize_t xe_eudebug_vm_read(struct file *file,
+				  char __user *buffer,
+				  size_t count, loff_t *pos)
+{
+	return __xe_eudebug_vm_access(file, buffer, NULL, count, pos);
+}
+
+static ssize_t xe_eudebug_vm_write(struct file *file,
+				   const char __user *buffer,
+				   size_t count, loff_t *pos)
+{
+	return __xe_eudebug_vm_access(file, NULL, buffer, count, pos);
+}
+
+static int engine_rcu_flush(struct xe_eudebug *d,
+			    struct xe_hw_engine *hwe,
+			    unsigned int timeout_us)
+{
+	const struct xe_reg psmi_addr = RING_PSMI_CTL(hwe->mmio_base);
+	struct xe_gt *gt = hwe->gt;
+	u32 mask = RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL;
+	unsigned int fw_ref;
+	u32 psmi_ctrl;
+	u32 id;
+	int ret;
+
+	if (hwe->class == XE_ENGINE_CLASS_RENDER)
+		id = 0;
+	else if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		id = hwe->instance + 1;
+	else
+		return -EINVAL;
+
+	if (id < 8)
+		mask |= id << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT;
+	else
+		mask |= (id - 8) << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT |
+			RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1;
+
+	fw_ref = xe_force_wake_get(gt_to_fw(gt), hwe->domain);
+	if (!fw_ref)
+		return -ETIMEDOUT;
+
+	/* Prevent concurrent flushes */
+	mutex_lock(&d->eu_lock);
+	psmi_ctrl = xe_mmio_read32(&gt->mmio, psmi_addr);
+	if (!(psmi_ctrl & IDLE_MSG_DISABLE))
+		xe_mmio_write32(&gt->mmio, psmi_addr, _MASKED_BIT_ENABLE(IDLE_MSG_DISABLE));
+
+	ret = xe_mmio_wait32(&gt->mmio, RCU_ASYNC_FLUSH,
+			     RCU_ASYNC_FLUSH_IN_PROGRESS, 0,
+			     timeout_us, NULL, false);
+	if (ret)
+		goto out;
+
+	xe_mmio_write32(&gt->mmio, RCU_ASYNC_FLUSH, mask);
+
+	ret = xe_mmio_wait32(&gt->mmio, RCU_ASYNC_FLUSH,
+			     RCU_ASYNC_FLUSH_IN_PROGRESS, 0,
+			     timeout_us, NULL, false);
+out:
+	if (!(psmi_ctrl & IDLE_MSG_DISABLE))
+		xe_mmio_write32(&gt->mmio, psmi_addr, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+
+	mutex_unlock(&d->eu_lock);
+	xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+	return ret;
+}
+
+static int xe_eudebug_vm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct vm_file *vmf = file->private_data;
+	struct xe_eudebug *d = vmf->debugger;
+	struct xe_gt *gt;
+	int gt_id;
+	int ret = -EINVAL;
+
+	eu_dbg(d, "vm_fsync: client_handle=%llu, vm_handle=%llu, flags=0x%llx, start=%llu, end=%llu datasync=%d\n",
+	       vmf->client_id, vmf->vm_handle, vmf->flags, start, end, datasync);
+
+	for_each_gt(gt, d->xe, gt_id) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+
+		/* XXX: vm open per engine? */
+		for_each_hw_engine(hwe, gt, id) {
+			u64 timeout_us;
+
+			if (hwe->class != XE_ENGINE_CLASS_RENDER &&
+			    hwe->class != XE_ENGINE_CLASS_COMPUTE)
+				continue;
+
+			timeout_us = div64_u64(vmf->timeout_ns, 1000ull);
+			ret = engine_rcu_flush(d, hwe, timeout_us);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+static int xe_eudebug_vm_release(struct inode *inode, struct file *file)
+{
+	struct vm_file *vmf = file->private_data;
+	struct xe_eudebug *d = vmf->debugger;
+
+	eu_dbg(d, "vm_release: client_handle=%llu, vm_handle=%llu, flags=0x%llx",
+	       vmf->client_id, vmf->vm_handle, vmf->flags);
+
+	xe_vm_put(vmf->vm);
+	xe_file_put(vmf->xef);
+	xe_eudebug_put(d);
+	drm_dev_put(&d->xe->drm);
+
+	kfree(vmf);
+
+	return 0;
+}
+
+static const struct file_operations vm_fops = {
+	.owner   = THIS_MODULE,
+	.llseek  = generic_file_llseek,
+	.read    = xe_eudebug_vm_read,
+	.write   = xe_eudebug_vm_write,
+	.fsync   = xe_eudebug_vm_fsync,
+	.mmap    = NULL,
+	.release = xe_eudebug_vm_release,
+};
+
+static long
+xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg)
+{
+	struct drm_xe_eudebug_vm_open param;
+	struct xe_device * const xe = d->xe;
+	struct vm_file *vmf = NULL;
+	struct xe_file *xef;
+	struct xe_vm *vm;
+	struct file *file;
+	long ret = 0;
+	int fd;
+
+	if (XE_IOCTL_DBG(xe, _IOC_SIZE(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) != sizeof(param)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(_IOC_DIR(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) & _IOC_WRITE)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, copy_from_user(&param, (void __user *)arg, sizeof(param))))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, param.flags))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, xe_eudebug_detached(d)))
+		return -ENOTCONN;
+
+	xef = find_client_get(d, param.client_handle);
+	if (xef)
+		vm = find_vm_get(d, param.vm_handle);
+	else
+		vm = NULL;
+
+	if (XE_IOCTL_DBG(xe, !xef))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !vm)) {
+		ret = -EINVAL;
+		goto out_file_put;
+	}
+
+	vmf = kzalloc(sizeof(*vmf), GFP_KERNEL);
+	if (XE_IOCTL_DBG(xe, !vmf)) {
+		ret = -ENOMEM;
+		goto out_vm_put;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (XE_IOCTL_DBG(xe, fd < 0)) {
+		ret = fd;
+		goto out_free;
+	}
+
+	kref_get(&d->ref);
+	vmf->debugger = d;
+	vmf->vm = vm;
+	vmf->xef = xef;
+	vmf->flags = param.flags;
+	vmf->client_id = param.client_handle;
+	vmf->vm_handle = param.vm_handle;
+	vmf->timeout_ns = param.timeout_ns;
+
+	file = anon_inode_getfile("[xe_eudebug.vm]", &vm_fops, vmf, O_RDWR);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		XE_IOCTL_DBG(xe, ret);
+		file = NULL;
+		goto out_fd_put;
+	}
+
+	file->f_mode |= FMODE_PREAD | FMODE_PWRITE |
+		FMODE_READ | FMODE_WRITE | FMODE_LSEEK;
+
+	fd_install(fd, file);
+
+	eu_dbg(d, "vm_open: client_handle=%llu, handle=%llu, flags=0x%llx, fd=%d",
+	       vmf->client_id, vmf->vm_handle, vmf->flags, fd);
+
+	XE_WARN_ON(ret);
+
+	drm_dev_get(&xe->drm);
+
+	return fd;
+
+out_fd_put:
+	put_unused_fd(fd);
+	xe_eudebug_put(d);
+out_free:
+	kfree(vmf);
+out_vm_put:
+	xe_vm_put(vm);
+out_file_put:
+	xe_file_put(xef);
+
+	XE_WARN_ON(ret >= 0);
+
+	return ret;
+}
diff --git a/include/uapi/drm/xe_drm_eudebug.h b/include/uapi/drm/xe_drm_eudebug.h
index b2f6038ccc59..9917280400d6 100644
--- a/include/uapi/drm/xe_drm_eudebug.h
+++ b/include/uapi/drm/xe_drm_eudebug.h
@@ -18,6 +18,7 @@ extern "C" {
 #define DRM_XE_EUDEBUG_IOCTL_READ_EVENT		_IO('j', 0x0)
 #define DRM_XE_EUDEBUG_IOCTL_EU_CONTROL		_IOWR('j', 0x2, struct drm_xe_eudebug_eu_control)
 #define DRM_XE_EUDEBUG_IOCTL_ACK_EVENT		_IOW('j', 0x4, struct drm_xe_eudebug_ack_event)
+#define DRM_XE_EUDEBUG_IOCTL_VM_OPEN		_IOW('j', 0x1, struct drm_xe_eudebug_vm_open)
 
 /* XXX: Document events to match their internal counterparts when moved to xe_drm.h */
 struct drm_xe_eudebug_event {
@@ -170,6 +171,23 @@ struct drm_xe_eudebug_ack_event {
 	__u64 seqno;
 };
 
+struct drm_xe_eudebug_vm_open {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @client_handle: id of client */
+	__u64 client_handle;
+
+	/** @vm_handle: id of vm */
+	__u64 vm_handle;
+
+	/** @flags: flags */
+	__u64 flags;
+
+	/** @timeout_ns: Timeout value in nanoseconds operations (fsync) */
+	__u64 timeout_ns;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.46.1



More information about the Intel-xe mailing list