[Intel-xe] [RFC 1/2] drm/xe/eudebug: Introduce eudebug support
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue May 2 10:35:49 UTC 2023
With eudebug event interface, user space debugger process (like gdb)
is able to keep track of resources created by another process
(debuggee using drm/xe) and act upon these resources.
For example, debugger can find a client vm which contains isa/elf
for a particular shader/eu-kernel and then inspect and modify it
(for example installing a breakpoint).
Debugger first opens a connection to xe with a drm ioctl specifying
target pid to connect. This returns an anon fd handle that can then be
used to listen for events with dedicated ioctl.
This patch introduces eudebug connection and event queuing, adding
client create/destroy and vm create/destroy events as a baseline.
More events for full debugger operation are needed and
those will be introduced in follow up patches.
The resource tracking parts are inspired by the work of
Maciej Patelczyk on resource handling for i915. Chris Wilson
suggested improvement of two ways mapping which makes it easy to
use resource map as a definitive bookkeep of what resources
are played to debugger in the discovery phase (on follow up patch).
v2: - event printer removed (Maarten)
- trim down kfifo accessors (Maarten)
- xa_alloc spurious locking removed (Maarten)
Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Cc: Maciej Patelczyk <maciej.patelczyk at intel.com>
Cc: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
drivers/gpu/drm/xe/Makefile | 3 +-
drivers/gpu/drm/xe/xe_device.c | 24 +-
drivers/gpu/drm/xe/xe_device_types.h | 25 +
drivers/gpu/drm/xe/xe_eudebug.c | 999 ++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_eudebug.h | 27 +
drivers/gpu/drm/xe/xe_eudebug_types.h | 166 +++++
drivers/gpu/drm/xe/xe_vm.c | 5 +
include/uapi/drm/xe_drm_tmp.h | 76 ++
8 files changed, 1323 insertions(+), 2 deletions(-)
create mode 100644 drivers/gpu/drm/xe/xe_eudebug.c
create mode 100644 drivers/gpu/drm/xe/xe_eudebug.h
create mode 100644 drivers/gpu/drm/xe/xe_eudebug_types.h
create mode 100644 include/uapi/drm/xe_drm_tmp.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b84e191ba14f..8679ee29095b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,7 +95,8 @@ xe-y += xe_bb.o \
xe_vm_madvise.o \
xe_wait_user_fence.o \
xe_wa.o \
- xe_wopcm.o
+ xe_wopcm.o \
+ xe_eudebug.o
# i915 Display compat #defines and #includes
subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 45d6e5ff47fd..bcdb30a8bbcd 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -11,6 +11,7 @@
#include <drm/drm_ioctl.h>
#include <drm/drm_managed.h>
#include <drm/xe_drm.h>
+#include <drm/xe_drm_tmp.h>
#include "regs/xe_regs.h"
#include "xe_bo.h"
@@ -32,10 +33,13 @@
#include "xe_vm.h"
#include "xe_vm_madvise.h"
#include "xe_wait_user_fence.h"
+#include "xe_eudebug.h"
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
+ struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef;
+ int err;
xef = kzalloc(sizeof(*xef), GFP_KERNEL);
if (!xef)
@@ -50,7 +54,15 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1);
file->driver_priv = xef;
- return 0;
+
+ err = xa_alloc(&xe->clients.xa, &xef->client_id, xef, xa_limit_32b, GFP_KERNEL);
+
+ if (!err)
+ xe_eudebug_file_open(xef);
+ else
+ kfree(xef);
+
+ return err;
}
static void device_kill_persistent_engines(struct xe_device *xe,
@@ -79,6 +91,12 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
mutex_unlock(&xef->vm.lock);
mutex_destroy(&xef->vm.lock);
+ xe_eudebug_file_close(xef);
+
+ mutex_lock(&xe->clients.lock);
+ xa_erase(&xe->clients.xa, xef->client_id);
+ mutex_unlock(&xe->clients.lock);
+
kfree(xef);
}
@@ -103,6 +121,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EUDEBUG_CONNECT, xe_eudebug_connect_ioctl, DRM_RENDER_ALLOW),
};
static const struct file_operations xe_driver_fops = {
@@ -161,6 +180,7 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
destroy_workqueue(xe->ordered_wq);
ttm_device_fini(&xe->ttm);
+ xe_eudebug_fini(xe);
}
struct xe_device *xe_device_create(struct pci_dev *pdev,
@@ -207,6 +227,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
+ xe_eudebug_init(xe);
+
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
err = xe_display_create(xe);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1cb404e48aaa..1c713562e1e8 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -266,6 +266,28 @@ struct xe_device {
/** @d3cold_allowed: Indicates if d3cold is a valid device state */
bool d3cold_allowed;
+ /** @debugger connection list and globals for device */
+ struct {
+ /** @lock: protects the list of connections */
+ spinlock_t lock;
+ /** @list: list of connections, aka debuggers */
+ struct list_head list;
+
+ /** @session_count: session counter to track connections */
+ u64 session_count;
+
+ /** @available: is the debugging functionality available */
+ bool available;
+ } eudebug;
+
+ /** @clients xe_file tracking for eudebug discovery */
+ struct {
+ /** @lock: protects the xa */
+ struct mutex lock;
+ /** @xa: xarray of xe_files currently open */
+ struct xarray xa;
+ } clients;
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -367,6 +389,9 @@ struct xe_file {
/** @lock: protects file engine state */
struct mutex lock;
} engine;
+
+ /** @client_id: id in clients.xa for eudebug discovery */
+ int client_id;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
new file mode 100644
index 000000000000..fef56d8889be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -0,0 +1,999 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_eudebug.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+#include <uapi/drm/xe_drm_tmp.h>
+
+#include "xe_device.h"
+#include "xe_eudebug_types.h"
+
+/*
+ * If there is no event being read in this time (for example gdb stuck)
+ * connection is forcibly disconnected. This releases the client as it was
+ * waiting to get space for event in fifo.
+ */
+#define XE_EUDEBUG_NO_READ_DETECTED_TIMEOUT_MS (10 * 1000)
+
+#define for_each_debugger_rcu(debugger, head) \
+ list_for_each_entry_rcu((debugger), (head), connection_link)
+
+#define from_event(T, event) container_of((event), typeof(*(T)), base)
+#define to_event(e) (&(e)->base)
+
+#define XE_EUDEBUG_DBG_STR "eudbg: (%d/%d:%lld:%d/%d): "
+#define XE_EUDEBUG_DBG_ARGS(d) current->pid, \
+ task_tgid_nr(current), \
+ d->session, \
+ d->target_task->pid, \
+ task_tgid_nr(d->target_task)
+
+#define eu_err(d, fmt, ...) drm_err(&(d)->xe->drm, XE_EUDEBUG_DBG_STR # fmt, XE_EUDEBUG_DBG_ARGS(d), ##__VA_ARGS__)
+#define eu_warn(d, fmt, ...) drm_warn(&(d)->xe->drm, XE_EUDEBUG_DBG_STR # fmt, XE_EUDEBUG_DBG_ARGS(d), ##__VA_ARGS__)
+#define eu_dbg(d, fmt, ...) drm_dbg(&(d)->xe->drm, XE_EUDEBUG_DBG_STR # fmt, XE_EUDEBUG_DBG_ARGS(d), ##__VA_ARGS__)
+
+static struct xe_eudebug_event *
+event_fifo_pending(struct xe_eudebug *d)
+{
+ struct xe_eudebug_event *event;
+
+ if (kfifo_peek(&d->events.fifo, &event))
+ return event;
+
+ return NULL;
+}
+
+/*
+ * This is racy as we dont take the lock for read but all the
+ * callsites can handle the race so we can live without lock.
+ */
+__no_kcsan
+static unsigned int
+event_fifo_num_events_peek(const struct xe_eudebug * const d)
+{
+ return kfifo_len(&d->events.fifo);
+}
+
+static const struct rhashtable_params rhash_res = {
+ .head_offset = offsetof(struct xe_eudebug_handle, rh_head),
+ .key_len = sizeof_field(struct xe_eudebug_handle, key),
+ .key_offset = offsetof(struct xe_eudebug_handle, key),
+ .automatic_shrinking = true,
+};
+
+static struct xe_eudebug_resource *
+resource_from_type(struct xe_eudebug_resources * const res, const int t)
+{
+ XE_BUG_ON(t < 0);
+ XE_BUG_ON(t >= XE_EUDEBUG_RES_TYPE_COUNT);
+
+ return &res->rt[t];
+}
+
+static struct xe_eudebug_resources *
+xe_eudebug_resources_alloc(void)
+{
+ struct xe_eudebug_resources *res;
+ int err;
+ int i;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return NULL;
+
+ mutex_init(&res->lock);
+
+ for (i = 0; i < XE_EUDEBUG_RES_TYPE_COUNT; i++) {
+ xa_init_flags(&res->rt[i].xa, XA_FLAGS_ALLOC1);
+ err = rhashtable_init(&res->rt[i].rh, &rhash_res);
+
+ if (err) {
+ while (i--) {
+ xa_destroy(&res->rt[i].xa);
+ rhashtable_destroy(&res->rt[i].rh);
+ }
+
+ goto out;
+ }
+ }
+
+out:
+ if (err) {
+ kfree(res);
+ res = NULL;
+ }
+
+ return res;
+}
+
+static void res_free_fn(void *ptr, void *arg)
+{
+ XE_WARN_ON(ptr);
+ kfree(ptr);
+}
+
+static void
+xe_eudebug_resources_free(struct xe_eudebug *d)
+{
+ struct xe_eudebug_resources *res = d->res;
+ struct xe_eudebug_handle *h;
+ unsigned long i, j;
+ int err;
+
+ mutex_lock(&res->lock);
+ for (i = 0; i < XE_EUDEBUG_RES_TYPE_COUNT; i++) {
+ struct xe_eudebug_resource *r = &res->rt[i];
+
+ xa_for_each(&r->xa, j, h) {
+ struct xe_eudebug_handle *t;
+
+ err = rhashtable_remove_fast(&r->rh,
+ &h->rh_head,
+ rhash_res);
+ XE_WARN_ON(err);
+ t = xa_erase(&r->xa, h->id);
+ XE_WARN_ON(t != h);
+ kfree(t);
+ }
+ }
+ mutex_unlock(&res->lock);
+
+ for (i = 0; i < XE_EUDEBUG_RES_TYPE_COUNT; i++) {
+ struct xe_eudebug_resource *r = &res->rt[i];
+
+ rhashtable_free_and_destroy(&r->rh, res_free_fn, NULL);
+ XE_WARN_ON(!xa_empty(&r->xa));
+ xa_destroy(&r->xa);
+ }
+
+ mutex_destroy(&res->lock);
+
+ kfree(res);
+}
+
+static void xe_eudebug_free(struct kref *ref)
+{
+ struct xe_eudebug *d = container_of(ref, typeof(*d), ref);
+ struct xe_eudebug_event *event;
+
+ while (kfifo_get(&d->events.fifo, &event))
+ kfree(event);
+
+ xe_eudebug_resources_free(d);
+ put_task_struct(d->target_task);
+ mutex_destroy(&d->lock);
+
+ XE_WARN_ON(kfifo_len(&d->events.fifo));
+
+ kfree_rcu(d, rcu);
+}
+
+static void xe_eudebug_put(struct xe_eudebug *d)
+{
+ kref_put(&d->ref, xe_eudebug_free);
+}
+
+static bool
+xe_eudebug_detached(const struct xe_eudebug * const d)
+{
+ /* Can only be set so we accept the race */
+ return data_race(READ_ONCE(d->closed));
+}
+
+static void xe_eudebug_detach(struct xe_eudebug *d)
+{
+ struct xe_device *xe = d->xe;
+
+ XE_WARN_ON(!xe_eudebug_detached(d));
+
+ spin_lock(&xe->eudebug.lock);
+ list_del_rcu(&d->connection_link);
+ eu_dbg(d, "session %lld detached", d->session);
+ spin_unlock(&xe->eudebug.lock);
+}
+
+static void xe_eudebug_disconnect(struct xe_eudebug *d,
+ const int err)
+{
+ bool detached = false;
+
+ mutex_lock(&d->lock);
+ if (!d->closed) {
+ d->closed = true;
+ detached = true;
+ d->last_error = err;
+ }
+ mutex_unlock(&d->lock);
+
+ if (detached) {
+ xe_eudebug_detach(d);
+ eu_dbg(d, "disconnected: %d (%d)", d->last_error, err);
+ }
+
+ wake_up_all(&d->events.write_done);
+
+ if (detached)
+ xe_eudebug_put(d);
+}
+
+static int xe_eudebug_release(struct inode *inode, struct file *file)
+{
+ struct xe_eudebug *d = file->private_data;
+
+ xe_eudebug_disconnect(d, 0);
+ xe_eudebug_put(d);
+
+ return 0;
+}
+
+static __poll_t xe_eudebug_poll(struct file *file, poll_table *wait)
+{
+ struct xe_eudebug * const d = file->private_data;
+ __poll_t ret = 0;
+
+ poll_wait(file, &d->events.write_done, wait);
+
+ if (xe_eudebug_detached(d)) {
+ ret |= EPOLLHUP;
+ if (d->last_error)
+ ret |= EPOLLERR;
+ }
+
+ if (event_fifo_num_events_peek(d))
+ ret |= EPOLLIN;
+
+ return ret;
+}
+
+static ssize_t xe_eudebug_read(struct file *file,
+ char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ return -EINVAL;
+}
+
+static struct xe_eudebug *
+xe_eudebug_for_task_get(struct xe_device *xe,
+ struct task_struct *task)
+{
+ struct xe_eudebug *d, *iter;
+
+ d = NULL;
+
+ rcu_read_lock();
+ for_each_debugger_rcu(iter, &xe->eudebug.list) {
+ if (!same_thread_group(iter->target_task, task))
+ continue;
+
+ if (kref_get_unless_zero(&iter->ref))
+ d = iter;
+
+ break;
+ }
+ rcu_read_unlock();
+
+ return d;
+}
+
+static struct task_struct *find_task_get(struct pid *pid)
+{
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ if (task)
+ get_task_struct(task);
+ rcu_read_unlock();
+
+ return task;
+}
+
+#define xef_to_xe(xef) to_xe_device((xef)->drm->minor->dev)
+
+static struct xe_eudebug *
+xe_eudebug_get(struct xe_file *xef)
+{
+ struct xe_device *xe = xef_to_xe(xef);
+ struct task_struct *task;
+ struct xe_eudebug *d = NULL;
+
+ task = find_task_get(xef->drm->pid);
+ if (task) {
+ d = xe_eudebug_for_task_get(xe, task);
+ put_task_struct(task);
+ }
+
+ if (d && xe_eudebug_detached(d)) {
+ xe_eudebug_put(d);
+ d = NULL;
+ }
+
+ return d;
+}
+
+static int queue_event(struct xe_eudebug * const d,
+ struct xe_eudebug_event **event)
+{
+ if (*event == NULL)
+ return -EINVAL;
+
+ /* We just drop quietly on disconnected */
+ if (xe_eudebug_detached(d)) {
+ wake_up_all(&d->events.write_done);
+ kfree(*event);
+ *event = NULL;
+ return 0;
+ }
+
+ if (kfifo_in_spinlocked(&d->events.fifo, event, 1, &d->events.lock)) {
+ wake_up_all(&d->events.write_done);
+ *event = NULL;
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int _xe_eudebug_queue_event(struct xe_eudebug *d,
+ struct xe_eudebug_event *event,
+ gfp_t gfp)
+{
+ u64 start_t;
+ int ret;
+
+ XE_BUG_ON(event->size <= sizeof(struct xe_eudebug_event));
+ XE_BUG_ON(!event->type);
+ XE_BUG_ON(event->type == DRM_XE_EUDEBUG_EVENT_READ);
+
+ ret = queue_event(d, &event);
+ if (!ret)
+ return 0;
+
+ start_t = ktime_get();
+
+ while (ret == -ENOSPC) {
+ struct xe_eudebug_event *blocking;
+
+ ret = queue_event(d, &event);
+ if (ret != -ENOSPC)
+ break;
+
+ blocking = event_fifo_pending(d);
+
+ msleep(1 + 1 * event_fifo_num_events_peek(d));
+
+ /* restart timeout if we see progress on fifo */
+ if (blocking && blocking != event_fifo_pending(d))
+ start_t = ktime_get();
+
+ if (ktime_ms_delta(ktime_get(), start_t) >=
+ XE_EUDEBUG_NO_READ_DETECTED_TIMEOUT_MS)
+ ret = -ETIMEDOUT;
+ }
+
+ if (ret) {
+ eu_warn(d, "event %llu queue failed (blocked %lld ms), disconnecting with %d",
+ event ? event->seqno : 0,
+ ktime_ms_delta(ktime_get(), start_t),
+ ret);
+ xe_eudebug_disconnect(d, ret);
+ }
+
+ kfree(event);
+
+ return ret;
+}
+
+static int xe_eudebug_queue_event(struct xe_eudebug *d,
+ struct xe_eudebug_event *event)
+{
+ return _xe_eudebug_queue_event(d, event, GFP_KERNEL);
+}
+
+static struct xe_eudebug_handle *
+alloc_handle(const int type, const void * const key)
+{
+ struct xe_eudebug_handle *h;
+
+ h = kzalloc(sizeof(*h), GFP_KERNEL);
+ if (!h)
+ return NULL;
+
+ h->key = (u64)key;
+
+ return h;
+}
+
+static struct xe_eudebug_handle *
+__find_handle(struct xe_eudebug_resource *r,
+ void *key)
+{
+ struct xe_eudebug_handle *h;
+
+ h = rhashtable_lookup_fast(&r->rh,
+ &key,
+ rhash_res);
+ if (h) {
+ XE_WARN_ON(!h->id);
+ XE_WARN_ON(h != xa_load(&r->xa, h->id));
+ }
+
+ return h;
+}
+
+static int find_handle(struct xe_eudebug_resources *res,
+ const int type,
+ void *key)
+{
+ struct xe_eudebug_resource *r;
+ struct xe_eudebug_handle *h;
+ int id;
+
+ r = resource_from_type(res, type);
+
+ mutex_lock(&res->lock);
+ h = __find_handle(r, key);
+ id = h ? h->id : -ENOENT;
+ mutex_unlock(&res->lock);
+
+ return id;
+}
+
+static int xe_eudebug_add_handle(struct xe_eudebug *d,
+ int type,
+ void *p)
+{
+ struct xe_eudebug_resource *r;
+ struct xe_eudebug_handle *h;
+ int err;
+
+ if (xe_eudebug_detached(d))
+ return -ENOTCONN;
+
+ h = alloc_handle(type, p);
+ if (!h)
+ return -ENOMEM;
+
+ r = resource_from_type(d->res, type);
+
+ mutex_lock(&d->res->lock);
+ if (!__find_handle(r, p)) {
+ err = xa_alloc(&r->xa, &h->id, h, xa_limit_31b, GFP_KERNEL);
+
+ if (h->id >= INT_MAX) {
+ xa_erase(&r->xa, h->id);
+ err = -ENOSPC;
+ }
+
+ if (!err)
+ err = rhashtable_insert_fast(&r->rh,
+ &h->rh_head,
+ rhash_res);
+
+ if (err)
+ xa_erase(&r->xa, h->id);
+ } else {
+ err = -EEXIST;
+ }
+ mutex_unlock(&d->res->lock);
+
+ if (err) {
+ kfree(h);
+ XE_WARN_ON(err > 0);
+ return err;
+ }
+
+ return h->id;
+}
+
+static long xe_eudebug_remove_handle(struct xe_eudebug *d, int type, void *p)
+{
+ struct xe_eudebug_resource *r;
+ struct xe_eudebug_handle *h, *xa_h;
+ long ret;
+
+ if (xe_eudebug_detached(d))
+ return -ENOTCONN;
+
+ r = resource_from_type(d->res, type);
+
+ mutex_lock(&d->res->lock);
+ h = __find_handle(r, p);
+ if (h) {
+ ret = rhashtable_remove_fast(&r->rh,
+ &h->rh_head,
+ rhash_res);
+ xa_h = xa_erase(&r->xa, h->id);
+ XE_WARN_ON(ret);
+ XE_WARN_ON(xa_h != h);
+ if (!ret)
+ ret = h->id;
+ } else {
+ ret = -ENOENT;
+ }
+ mutex_unlock(&d->res->lock);
+
+ kfree(h);
+
+ XE_WARN_ON(!ret);
+
+ return ret;
+}
+
+static struct xe_eudebug_event *
+xe_eudebug_create_event(struct xe_eudebug *d,
+ u32 type, u32 flags, u32 size, gfp_t gfp)
+{
+ struct xe_eudebug_event *event;
+
+ XE_WARN_ON(size <= sizeof(*event));
+
+ event = kzalloc(size, gfp);
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->flags = flags;
+ event->size = size;
+ event->seqno = atomic_long_inc_return(&d->events.seqno);
+
+ return event;
+}
+
+static long xe_eudebug_read_event(struct xe_eudebug *d,
+ const unsigned long arg,
+ const bool nonblock)
+{
+ struct drm_xe_eudebug_event __user * const user_orig =
+ (void __user *)(arg);
+ struct drm_xe_eudebug_event user_event;
+ struct xe_eudebug_event *event;
+ long ret;
+
+ if (copy_from_user(&user_event, user_orig, sizeof(user_event)))
+ return -EFAULT;
+
+ if (!user_event.type)
+ return -EINVAL;
+
+ if (user_event.type > DRM_XE_EUDEBUG_EVENT_MAX_EVENT)
+ return -EINVAL;
+
+ if (user_event.type != DRM_XE_EUDEBUG_EVENT_READ)
+ return -EINVAL;
+
+ if (user_event.size < sizeof(*user_orig))
+ return -EINVAL;
+
+ if (user_event.flags)
+ return -EINVAL;
+
+ /* timeout as param */
+ ret = wait_event_interruptible_timeout(d->events.write_done,
+ event_fifo_num_events_peek(d),
+ msecs_to_jiffies(10*10000));
+
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&d->events.lock);
+ event = event_fifo_pending(d);
+ if (event) {
+ if (user_event.size < event->size) {
+ ret = -EMSGSIZE;
+ } else if (!access_ok(user_orig, event->size)) {
+ ret = -EFAULT;
+ } else if (!kfifo_get(&d->events.fifo, &event)) {
+ eu_warn(d, "internal fifo corruption");
+ ret = -ENOTCONN;
+ } else {
+ ret = 0;
+ }
+ } else {
+ ret = -ENOENT;
+ }
+ spin_unlock(&d->events.lock);
+
+ if (ret)
+ return ret;
+
+ ret = __copy_to_user(user_orig, event, event->size);
+ if (ret)
+ ret = -EFAULT;
+
+ kfree(event);
+
+ return ret;
+}
+
+static long xe_eudebug_ioctl(struct file *file,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ struct xe_eudebug * const d = file->private_data;
+ long ret;
+
+ switch (cmd) {
+ case DRM_XE_EUDEBUG_IOCTL_READ_EVENT:
+ ret = xe_eudebug_read_event(d, arg,
+ file->f_flags & O_NONBLOCK);
+ eu_dbg(d, "ioctl cmd=READ_EVENT ret=%ld\n", ret);
+ break;
+
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .release = xe_eudebug_release,
+ .poll = xe_eudebug_poll,
+ .read = xe_eudebug_read,
+ .unlocked_ioctl = xe_eudebug_ioctl,
+};
+
+static struct task_struct *find_get_target(const pid_t nr)
+{
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(find_pid_ns(nr, task_active_pid_ns(current)), PIDTYPE_PID);
+ if (task)
+ get_task_struct(task);
+ rcu_read_unlock();
+
+ return task;
+}
+
+
+static int
+xe_eudebug_connect(struct xe_device *xe,
+ struct drm_xe_eudebug_connect_param *param)
+{
+ const u64 known_open_flags = 0;
+ struct xe_eudebug *d, *t = NULL;
+ unsigned long f_flags = 0;
+ int fd;
+ int err;
+
+ if (!param->pid)
+ return -EINVAL;
+
+ if (param->flags & ~known_open_flags)
+ return -EINVAL;
+
+ if (param->version && param->version != DRM_XE_EUDEBUG_VERSION)
+ return -EINVAL;
+
+ /* XXX: You get all for now */
+ if (param->events)
+ return -EINVAL;
+
+ if (param->extensions)
+ return -EINVAL;
+
+ param->version = DRM_XE_EUDEBUG_VERSION;
+
+ if (!xe->eudebug.available)
+ return -ENOTSUPP;
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ kref_init(&d->ref);
+ mutex_init(&d->lock);
+ init_waitqueue_head(&d->events.write_done);
+
+ spin_lock_init(&d->events.lock);
+ INIT_KFIFO(d->events.fifo);
+
+ d->res = xe_eudebug_resources_alloc();
+ if (!d->res) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ d->target_task = find_get_target(param->pid);
+ if (!d->target_task) {
+ err = -ENOENT;
+ goto err_free_res;
+ }
+
+ /* XXX: Proper access tracking with ptrace_may_access */
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EACCES;
+ goto err_put_task;
+ }
+
+ t = xe_eudebug_for_task_get(xe, d->target_task);
+ if (t) {
+ err = -EBUSY;
+ goto err_put_task;
+ }
+
+ d->xe = xe;
+
+ fd = anon_inode_getfd("[xe_eudebug]", &fops, d, f_flags);
+ if (fd < 0) {
+ err = fd;
+ goto err_put_task;
+ }
+
+ spin_lock(&xe->eudebug.lock);
+ /* XXX handle the overflow without bailing out */
+ if (xe->eudebug.session_count + 1 == 0) {
+ spin_unlock(&xe->eudebug.lock);
+ drm_err(&xe->drm, "debugger connections exhausted. (you need module reload)\n");
+ err = -EBUSY;
+ goto err_put_task;
+ }
+
+ d->session = ++xe->eudebug.session_count;
+ kref_get(&d->ref);
+ list_add_tail_rcu(&d->connection_link, &xe->eudebug.list);
+ spin_unlock(&xe->eudebug.lock);
+
+ eu_dbg(d, "connected session %lld", d->session);
+
+ return fd;
+
+err_put_task:
+ if (t)
+ xe_eudebug_put(t);
+
+ put_task_struct(d->target_task);
+err_free_res:
+ xe_eudebug_resources_free(d);
+err_free:
+ kfree(d);
+
+ return err;
+}
+
+int xe_eudebug_connect_ioctl(struct drm_device *dev,
+ void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct drm_xe_eudebug_connect_param * const param = data;
+ int ret = 0;
+
+ ret = xe_eudebug_connect(xe, param);
+
+ return ret;
+}
+
+void xe_eudebug_init(struct xe_device *xe)
+{
+ int ret;
+
+ spin_lock_init(&xe->eudebug.lock);
+ INIT_LIST_HEAD(&xe->eudebug.list);
+ xa_init_flags(&xe->clients.xa, XA_FLAGS_ALLOC1);
+
+ ret = drmm_mutex_init(&xe->drm, &xe->clients.lock);
+ if (ret)
+ drm_warn(&xe->drm,
+ "eudebug init failed: %d, debugger unavailable\n",
+ ret);
+
+ xe->eudebug.available = ret == 0;
+}
+
+void xe_eudebug_fini(struct xe_device *xe)
+{
+ XE_WARN_ON(!list_empty_careful(&xe->eudebug.list));
+ mutex_destroy(&xe->clients.lock);
+}
+
+#define struct_member(T, member) (((T *)0)->member)
+
+#define write_member(T_out, ptr, member, value) { \
+ BUILD_BUG_ON(sizeof(*ptr) != sizeof(T_out)); \
+ BUILD_BUG_ON(offsetof(typeof(*ptr), member) != \
+ offsetof(typeof(T_out), member)); \
+ BUILD_BUG_ON(sizeof(ptr->member) != sizeof(value)); \
+ BUILD_BUG_ON(sizeof(struct_member(T_out, member)) != sizeof(value)); \
+ BUILD_BUG_ON(!typecheck(typeof((ptr)->member), value)); \
+ /* memcpy(&ptr->member, &(value), sizeof(ptr->member)); */ \
+ (ptr)->member = (value); \
+ }
+
+static int send_open_event(struct xe_eudebug *d, u32 flags, const u64 handle)
+{
+ struct xe_eudebug_event *event;
+ struct xe_eudebug_event_open *eo;
+
+ if (!handle)
+ return -EINVAL;
+
+ if (XE_WARN_ON((long)handle >= INT_MAX))
+ return -EINVAL;
+
+ event = xe_eudebug_create_event(d, DRM_XE_EUDEBUG_EVENT_OPEN,
+ flags, sizeof(*eo), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ eo = from_event(eo, event);
+
+ write_member(struct drm_xe_eudebug_event_client, eo,
+ client_handle, handle);
+
+ return xe_eudebug_queue_event(d, event);
+}
+
+static int client_create_event(struct xe_eudebug *d, struct xe_file *xef)
+{
+ int ret;
+
+ ret = xe_eudebug_add_handle(d, XE_EUDEBUG_RES_TYPE_CLIENT, xef);
+
+ if (ret > 0)
+ ret = send_open_event(d, DRM_XE_EUDEBUG_EVENT_CREATE, ret);
+
+ return ret;
+}
+
+static int client_destroy_event(struct xe_eudebug *d, struct xe_file *xef)
+{
+ int ret;
+
+ ret = xe_eudebug_remove_handle(d, XE_EUDEBUG_RES_TYPE_CLIENT, xef);
+ if (ret > 0)
+ ret = send_open_event(d, DRM_XE_EUDEBUG_EVENT_DESTROY, ret);
+
+ return ret;
+}
+
+void xe_eudebug_file_open(struct xe_file *xef)
+{
+ struct xe_eudebug *d;
+ int err;
+
+ d = xe_eudebug_get(xef);
+ if (!d)
+ return;
+
+ err = client_create_event(d, xef);
+ if (err == -EEXIST)
+ err = 0;
+
+ if (err) {
+ eu_err(d, "error %d on eudebug_file_open, disconnecting", err);
+ xe_eudebug_disconnect(d, err);
+ }
+
+ xe_eudebug_put(d);
+}
+
+void xe_eudebug_file_close(struct xe_file *xef)
+{
+ struct xe_eudebug *d;
+ int err;
+
+ d = xe_eudebug_get(xef);
+ if (!d)
+ return;
+
+ err = client_destroy_event(d, xef);
+ if (err) {
+ eu_err(d, "error %d on eudebug_file_close, disconnecting", err);
+ xe_eudebug_disconnect(d, err);
+ }
+
+ xe_eudebug_put(d);
+}
+
+static int send_vm_event(struct xe_eudebug *d, u32 flags,
+ const u64 client_handle,
+ const u64 vm_handle)
+{
+ struct xe_eudebug_event *event;
+ struct xe_eudebug_event_vm *e;
+
+ event = xe_eudebug_create_event(d, DRM_XE_EUDEBUG_EVENT_VM,
+ flags, sizeof(*e), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ e = from_event(e, event);
+
+ write_member(struct drm_xe_eudebug_event_vm, e, client_handle, client_handle);
+ write_member(struct drm_xe_eudebug_event_vm, e, vm_handle, vm_handle);
+
+ return xe_eudebug_queue_event(d, event);
+}
+
+static int vm_create_event(struct xe_eudebug *d,
+ struct xe_file *xef, struct xe_vm *vm)
+{
+ int h_c, h_vm;
+
+ h_c = find_handle(d->res, XE_EUDEBUG_RES_TYPE_CLIENT, xef);
+ if (h_c < 0)
+ return h_c;
+
+ h_vm = xe_eudebug_add_handle(d, XE_EUDEBUG_RES_TYPE_VM, vm);
+ if (h_vm < 0)
+ return h_vm;
+
+ XE_WARN_ON(!h_c);
+ XE_WARN_ON(!h_vm);
+
+ return send_vm_event(d, DRM_XE_EUDEBUG_EVENT_CREATE, h_c, h_vm);
+}
+
+static int vm_destroy_event(struct xe_eudebug *d,
+ struct xe_file *xef, struct xe_vm *vm)
+{
+ int h_c, h_vm;
+
+ h_c = find_handle(d->res, XE_EUDEBUG_RES_TYPE_CLIENT, xef);
+ if (h_c < 0) {
+ XE_WARN_ON("no client found for vm");
+ eu_warn(d, "no client found for vm");
+ return h_c;
+ }
+
+ h_vm = xe_eudebug_remove_handle(d, XE_EUDEBUG_RES_TYPE_VM, vm);
+ if (h_vm < 0)
+ return h_vm;
+
+ XE_WARN_ON(!h_c);
+ XE_WARN_ON(!h_vm);
+
+ return send_vm_event(d, DRM_XE_EUDEBUG_EVENT_DESTROY, h_c, h_vm);
+}
+
+void xe_eudebug_vm_create(struct xe_file *xef, struct xe_vm *vm)
+{
+ struct xe_eudebug *d;
+ int err;
+
+ d = xe_eudebug_get(xef);
+ if (!d)
+ return;
+
+ err = vm_create_event(d, xef, vm);
+ if (err == -EEXIST || err == -ENOTCONN)
+ err = 0;
+
+ if (err) {
+ eu_err(d, "error %d on eudebug_vm_create, disconnecting", err);
+ xe_eudebug_disconnect(d, err);
+ }
+
+ xe_eudebug_put(d);
+}
+
+void xe_eudebug_vm_destroy(struct xe_file *xef, struct xe_vm *vm)
+{
+ struct xe_eudebug *d;
+ int err;
+
+ d = xe_eudebug_get(xef);
+ if (!d)
+ return;
+
+ err = vm_destroy_event(d, xef, vm);
+ if (err) {
+ eu_err(d, "error %d on eudebug_vm_destroy, disconnecting", err);
+ xe_eudebug_disconnect(d, err);
+ }
+
+ xe_eudebug_put(d);
+}
diff --git a/drivers/gpu/drm/xe/xe_eudebug.h b/drivers/gpu/drm/xe/xe_eudebug.h
new file mode 100644
index 000000000000..df577b581364
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_eudebug.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_EUDEBUG_H_
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+struct xe_vm;
+
+int xe_eudebug_connect_ioctl(struct drm_device *dev,
+ void *data,
+ struct drm_file *file);
+
+void xe_eudebug_init(struct xe_device *xe);
+void xe_eudebug_fini(struct xe_device *xe);
+
+void xe_eudebug_file_open(struct xe_file *xef);
+void xe_eudebug_file_close(struct xe_file *xef);
+
+void xe_eudebug_vm_create(struct xe_file *xef, struct xe_vm *vm);
+void xe_eudebug_vm_destroy(struct xe_file *xef, struct xe_vm *vm);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_eudebug_types.h b/drivers/gpu/drm/xe/xe_eudebug_types.h
new file mode 100644
index 000000000000..1086944966cb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_eudebug_types.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_EUDEBUG_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/kfifo.h>
+#include <linux/completion.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+#include <linux/rbtree.h>
+#include <linux/rhashtable.h>
+
+#include <uapi/drm/xe_drm.h>
+
+struct xe_device;
+struct task_struct;
+struct xe_eudebug_event;
+
+#define CONFIG_DRM_XE_DEBUGGER_EVENT_QUEUE_SIZE 32
+
+/**
+ * struct xe_eudebug_event - Internal base event struct for eudebug
+ */
+struct xe_eudebug_event {
+ /** @ext: extensions for this event */
+ struct xe_user_extension ext;
+
+ /** @type: message type */
+ u32 type;
+
+ /** @flags: message flags */
+ u32 flags;
+
+ /** @seqno: sequence number for ordering */
+ u64 seqno;
+
+ /** @size: size of this event, including payload */
+ u64 size;
+
+ /** @data: payload bytes */
+ u8 data[];
+} __packed;
+
+/**
+ * struct xe_eudebug_event_open - Internal event for client open/close
+ */
+struct xe_eudebug_event_open {
+ /** @base: base event */
+ struct xe_eudebug_event base;
+
+ /** @client_handle: opaque handle for client */
+ u64 client_handle;
+} __packed;
+
+/**
+ * struct xe_eudebug_event_vm - Internal event for vm open/close
+ */
+struct xe_eudebug_event_vm {
+ /** @base: base event */
+ struct xe_eudebug_event base;
+
+ /** @client_handle: client containing the vm open/close */
+ u64 client_handle;
+
+ /** @vm_handle: vm handle it's open/close */
+ u64 vm_handle;
+} __packed;
+
+/**
+ * struct xe_eudebug_handle - eudebug resource handle
+ */
+struct xe_eudebug_handle {
+ /** @key: key value in rhashtable <key:id> */
+ u64 key;
+
+ /** @id: opaque handle id for xarray <id:key> */
+ int id;
+
+ /** @rh_head: rhashtable head */
+ struct rhash_head rh_head;
+};
+
+/**
+ * struct xe_eudebug_resource - Resource map for one resource
+ */
+struct xe_eudebug_resource {
+ /** @xa: xarrays for <id->key> */
+ struct xarray xa;
+
+ /** @rh rhashtable for <key->id> */
+ struct rhashtable rh;
+};
+
+#define XE_EUDEBUG_RES_TYPE_CLIENT 0
+#define XE_EUDEBUG_RES_TYPE_VM 1
+#define XE_EUDEBUG_RES_TYPE_COUNT (XE_EUDEBUG_RES_TYPE_VM + 1)
+
+/**
+ * struct xe_eudebug_resources - eudebug resources for all types
+ */
+struct xe_eudebug_resources {
+ /** @lock: guards access into rt */
+ struct mutex lock;
+
+ /** @rt: resource maps for all types */
+ struct xe_eudebug_resource rt[XE_EUDEBUG_RES_TYPE_COUNT];
+};
+
+/**
+ * struct xe_eudebug - Top level struct for eudebug: the connection
+ */
+struct xe_eudebug {
+ /** @ref: kref counter for this struct */
+ struct kref ref;
+
+ /** @rcu: rcu_head for rcu destruction */
+ struct rcu_head rcu;
+
+ /** @connection_link: our link into the xe_device:eudebug.list */
+ struct list_head connection_link;
+
+ /** @lock: guards access to last_error */
+ struct mutex lock;
+
+ /** @last_error: the error that resulted in disconnect */
+ int last_error;
+
+ /** @xe: the parent device we are serving */
+ struct xe_device *xe;
+
+ /** @closed: if debug connection is closed (one way) */
+ bool closed;
+
+ /** @target_task: the task that we are debugging */
+ struct task_struct *target_task;
+
+ /** @res: the resource maps we track for target_task */
+ struct xe_eudebug_resources *res;
+
+ /** @session: session number for this connection (for logs) */
+ u64 session;
+
+ /** @events: kfifo queue of to-be-delivered events */
+ struct {
+ /** @lock: guards access to fifo */
+ spinlock_t lock;
+
+ /** @fifo: queue of events pending */
+ DECLARE_KFIFO(fifo,
+ struct xe_eudebug_event *,
+ CONFIG_DRM_XE_DEBUGGER_EVENT_QUEUE_SIZE);
+
+ /** @write_done: waitqueue for signalling write to fifo */
+ wait_queue_head_t write_done;
+
+ /** @event_seqno: seqno counter to stamp events for fifo */
+ atomic_long_t seqno;
+ } events;
+
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 272f0f7f24fe..ab672d1eb1a4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -27,6 +27,7 @@
#include "xe_res_cursor.h"
#include "xe_sync.h"
#include "xe_trace.h"
+#include "xe_eudebug.h"
#define TEST_VM_ASYNC_OPS_ERROR
@@ -1905,6 +1906,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
#endif
+ xe_eudebug_vm_create(xef, vm);
+
return 0;
}
@@ -1928,6 +1931,8 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
return -EBUSY;
+ xe_eudebug_vm_destroy(xef, vm);
+
mutex_lock(&xef->vm.lock);
xa_erase(&xef->vm.xa, args->vm_id);
mutex_unlock(&xef->vm.lock);
diff --git a/include/uapi/drm/xe_drm_tmp.h b/include/uapi/drm/xe_drm_tmp.h
new file mode 100644
index 000000000000..9829cd724075
--- /dev/null
+++ b/include/uapi/drm/xe_drm_tmp.h
@@ -0,0 +1,76 @@
+#ifndef _UAPI_XE_DRM_TMP_H_
+#define _UAPI_XE_DRM_TMP_H_
+
+#include "xe_drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_XE_EUDEBUG_CONNECT 0x5f
+
+#define DRM_IOCTL_XE_EUDEBUG_CONNECT DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EUDEBUG_CONNECT, struct drm_xe_eudebug_connect_param)
+
+/**
+ * Do a eudebug event read for a debugger connection.
+ *
+ * This ioctl is available in debug version 1.
+ */
+#define DRM_XE_EUDEBUG_IOCTL_READ_EVENT _IO('j', 0x0)
+
+/* XXX: Document events to match their internal counterparts when moved to xe_drm.h */
+struct drm_xe_eudebug_event {
+ struct xe_user_extension ext;
+
+ __u32 type;
+#define DRM_XE_EUDEBUG_EVENT_NONE 0
+#define DRM_XE_EUDEBUG_EVENT_READ 1
+#define DRM_XE_EUDEBUG_EVENT_OPEN 2
+#define DRM_XE_EUDEBUG_EVENT_VM 3
+#define DRM_XE_EUDEBUG_EVENT_MAX_EVENT DRM_XE_EUDEBUG_EVENT_VM
+
+ __u32 flags;
+#define DRM_XE_EUDEBUG_EVENT_CREATE (1 << 0)
+#define DRM_XE_EUDEBUG_EVENT_DESTROY (1 << 1)
+#define DRM_XE_EUDEBUG_EVENT_STATE_CHANGE (1 << 2)
+
+ __u64 seqno;
+ __u64 size;
+} __attribute__((packed));
+
+struct drm_xe_eudebug_event_client {
+ struct drm_xe_eudebug_event base; /* .flags = CREATE/DESTROY */
+
+ __u64 client_handle; /* This is unique per debug connection */
+} __attribute__((packed));
+
+struct drm_xe_eudebug_event_vm {
+ struct drm_xe_eudebug_event base;
+
+ __u64 client_handle;
+ __u64 vm_handle;
+} __attribute__((packed));
+
+/*
+ * Debugger ABI (ioctl and events) Version History:
+ * 0 - No debugger available
+ * 1 - Initial version
+ */
+#define DRM_XE_EUDEBUG_VERSION 1
+
+struct drm_xe_eudebug_connect_param {
+ struct xe_user_extension ext;
+
+ __u64 pid; /* input: Target process ID */
+ __u32 flags;
+
+ __u32 version; /* output: current ABI (ioctl / events) version */
+ __u64 events; /* input: event types to subscribe to */
+ __u64 extensions; /* MBZ */
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_TMP_H_ */
--
2.34.1
More information about the Intel-xe
mailing list