[Intel-xe] [PATCH 2/2] drm/xe: Return correct error code for xe_wait_user_fence_ioctl
Bommu Krishnaiah
krishnaiah.bommu at intel.com
Tue Dec 5 14:37:55 UTC 2023
return correct error code if exec_queue is reset/engine is hung
Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu at intel.com>
Cc: Oak Zeng <oak.zeng at intel.com>
Cc: Kempczynski Zbigniew <Zbigniew.Kempczynski at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 ++
drivers/gpu/drm/xe/xe_execlist.c | 7 +++++++
drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++++++
drivers/gpu/drm/xe/xe_wait_user_fence.c | 13 +++++++++++++
4 files changed, 32 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 5ba47a5cfdbd..84ccf7242247 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -206,6 +206,8 @@ struct xe_exec_queue_ops {
* signalled when this function is called.
*/
void (*resume)(struct xe_exec_queue *q);
+ /** @reset_status: check exec queue reset status */
+ bool (*reset_status)(struct xe_exec_queue *q);
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index e8754adfc52a..dab41e99f3d3 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -446,6 +446,12 @@ static void execlist_exec_queue_resume(struct xe_exec_queue *q)
/* NIY */
}
+static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+ /* NIY */
+ return false;
+}
+
static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.init = execlist_exec_queue_init,
.kill = execlist_exec_queue_kill,
@@ -457,6 +463,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.suspend = execlist_exec_queue_suspend,
.suspend_wait = execlist_exec_queue_suspend_wait,
.resume = execlist_exec_queue_resume,
+ .reset_status = execlist_exec_queue_reset_status,
};
int xe_execlist_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 46b132ee1d3a..a42c3574cbec 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -854,6 +854,10 @@ static void simple_error_capture(struct xe_exec_queue *q)
static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+
+ /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
+ wake_up_all(&xe->ufence_wq);
if (xe_exec_queue_is_lr(q))
queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
@@ -1394,6 +1398,11 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
guc_exec_queue_add_msg(q, msg, RESUME);
}
+static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+ return exec_queue_reset(q);
+}
+
/*
* All of these functions are an abstraction layer which other parts of XE can
* use to trap into the GuC backend. All of these functions, aside from init,
@@ -1411,6 +1420,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.suspend = guc_exec_queue_suspend,
.suspend_wait = guc_exec_queue_suspend_wait,
.resume = guc_exec_queue_resume,
+ .reset_status = guc_exec_queue_reset_status,
};
static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 447e4cfd204c..fe3661b48a8f 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -13,6 +13,7 @@
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_macros.h"
+#include "xe_exec_queue.h"
static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
{
@@ -108,8 +109,10 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
struct drm_xe_wait_user_fence *args = data;
+ struct xe_exec_queue *q = NULL;
u64 addr = args->addr;
int err;
long timeout;
@@ -128,6 +131,10 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, addr & 0x7))
return -EINVAL;
+ q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+ if (XE_IOCTL_DBG(xe, !q))
+ return -ENOENT;
+
timeout = to_jiffies_timeout(xe, args);
start = ktime_get();
@@ -143,6 +150,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
break;
}
+ if (q->ops->reset_status(q)) {
+ drm_info(&xe->drm, "exec gueue reset detected\n");
+ err = -EIO;
+ break;
+ }
+
if (!timeout) {
err = -ETIME;
break;
--
2.25.1
More information about the Intel-xe
mailing list