[PATCH 7/7] drm/i915: Use irq work for coalescing-only dma-fence-work
Thomas Hellström
thomas.hellstrom at linux.intel.com
Wed Oct 6 06:59:48 UTC 2021
We are using a timeline-attached struct dma_fence_work to coalesce
dma-fences on eviction. In this mode we will not have a work callback
attached.
Similar to how the dma-fence-chain and dma-fence-array containers do this,
use irq work to signal to reduce latency.
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
drivers/gpu/drm/i915/i915_sw_fence_work.c | 36 ++++++++++++++++++-----
drivers/gpu/drm/i915/i915_sw_fence_work.h | 2 ++
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index bb62898752b3..485f930b9f8b 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -13,16 +13,17 @@ void dma_fence_work_timeline_attach(struct dma_fence_work_timeline *tl,
{
struct dma_fence *await;
+ might_sleep();
if (tl->ops->get)
tl->ops->get(tl);
- spin_lock(&tl->lock);
+ spin_lock_irq(&tl->lock);
await = tl->last_fence;
tl->last_fence = dma_fence_get(&f->dma);
f->dma.seqno = tl->seqno++;
f->dma.context = tl->context;
f->tl = tl;
- spin_unlock(&tl->lock);
+ spin_unlock_irq(&tl->lock);
if (await) {
__i915_sw_fence_await_dma_fence(&f->chain, await, tl_cb);
@@ -34,13 +35,14 @@ static void dma_fence_work_timeline_detach(struct dma_fence_work *f)
{
struct dma_fence_work_timeline *tl = f->tl;
bool put = false;
+ unsigned long irq_flags;
- spin_lock(&tl->lock);
+ spin_lock_irqsave(&tl->lock, irq_flags);
if (tl->last_fence == &f->dma) {
put = true;
tl->last_fence = NULL;
}
- spin_unlock(&tl->lock);
+ spin_unlock_irqrestore(&tl->lock, irq_flags);
if (tl->ops->put)
tl->ops->put(tl);
if (put)
@@ -49,8 +51,6 @@ static void dma_fence_work_timeline_detach(struct dma_fence_work *f)
static void dma_fence_work_complete(struct dma_fence_work *f)
{
- dma_fence_signal(&f->dma);
-
if (f->ops->release)
f->ops->release(f);
@@ -60,10 +60,27 @@ static void dma_fence_work_complete(struct dma_fence_work *f)
dma_fence_put(&f->dma);
}
+static void dma_fence_work_irq_work(struct irq_work *irq_work)
+{
+ struct dma_fence_work *f = container_of(irq_work, typeof(*f), irq_work);
+
+ dma_fence_signal(&f->dma);
+ if (f->ops->release)
+ /* Note we take the signaled path in dma_fence_work_work() */
+ queue_work(system_unbound_wq, &f->work);
+ else
+ dma_fence_work_complete(f);
+}
+
static void dma_fence_work_work(struct work_struct *work)
{
struct dma_fence_work *f = container_of(work, typeof(*f), work);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->dma.flags)) {
+ dma_fence_work_complete(f);
+ return;
+ }
+
if (f->ops->work) {
bool cookie = dma_fence_begin_signalling();
@@ -71,6 +88,8 @@ static void dma_fence_work_work(struct work_struct *work)
dma_fence_end_signalling(cookie);
}
+ dma_fence_signal(&f->dma);
+
dma_fence_work_complete(f);
}
@@ -87,8 +106,10 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
dma_fence_get(&f->dma);
if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags))
dma_fence_work_work(&f->work);
- else
+ else if (f->ops->work)
queue_work(system_unbound_wq, &f->work);
+ else
+ irq_work_queue(&f->irq_work);
break;
case FENCE_FREE:
@@ -140,6 +161,7 @@ void dma_fence_work_init(struct dma_fence_work *f,
dma_fence_init(&f->dma, &fence_ops, &f->lock, 0, 0);
i915_sw_fence_init(&f->chain, fence_notify);
INIT_WORK(&f->work, dma_fence_work_work);
+ init_irq_work(&f->irq_work, dma_fence_work_irq_work);
}
int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h
index 77361666accb..2d17e4a1b5e3 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h
@@ -8,6 +8,7 @@
#define I915_SW_FENCE_WORK_H
#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
@@ -47,6 +48,7 @@ struct dma_fence_work {
struct i915_sw_dma_fence_cb cb;
struct work_struct work;
+ struct irq_work irq_work;
struct dma_fence_work_timeline *tl;
--
2.31.1
More information about the Intel-gfx-trybot
mailing list