[PATCH] nouveau: Add irq waiting as alternative to busywait

Maarten Lankhorst m.b.lankhorst at gmail.com
Fri Jul 13 14:35:53 PDT 2012


A way to trigger an irq will be needed for optimus support since
cpu-waiting isn't always viable there. This could also be nice for
power saving on since cpu would no longer have to spin, and
performance might improve slightly on cpu-limited workloads.

Some way to quantify these effects would be nice, even if the
end result would be 'no performance regression'. An earlier
version always emitted an interrupt, resulting in glxgears going
from 8k fps to 7k. However this is no longer the case, as I'm
using the kernel submission channel for generating irqs as
needed now.

On nv84 I'm using NOTIFY_INTR, but that might have been
removed on fermi, so instead I'm using invalid command
0x0058 now as a way to signal completion.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>

---
 drivers/gpu/drm/nouveau/nouveau_drv.h   |    2 +
 drivers/gpu/drm/nouveau/nouveau_fence.c |   49 ++++++++++++++++++++++++++++---
 drivers/gpu/drm/nouveau/nouveau_fifo.h  |    1 +
 drivers/gpu/drm/nouveau/nouveau_state.c |    1 +
 drivers/gpu/drm/nouveau/nv04_fifo.c     |   25 ++++++++++++++++
 drivers/gpu/drm/nouveau/nv84_fence.c    |   18 +++++++++--
 drivers/gpu/drm/nouveau/nvc0_fence.c    |   12 ++++++--
 drivers/gpu/drm/nouveau/nvc0_fifo.c     |    3 +-
 drivers/gpu/drm/nouveau/nve0_fifo.c     |   15 +++++++--
 9 files changed, 110 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index f97a1a7..d9d274d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -707,6 +707,7 @@ struct drm_nouveau_private {
 		struct drm_mm heap;
 		struct nouveau_bo *bo;
 	} fence;
+	wait_queue_head_t fence_wq;
 
 	struct {
 		spinlock_t lock;
@@ -1656,6 +1657,7 @@ nv44_graph_class(struct drm_device *dev)
 #define NV84_SUBCHAN_WRCACHE_FLUSH                                   0x00000024
 #define NV10_SUBCHAN_REF_CNT                                         0x00000050
 #define NVSW_SUBCHAN_PAGE_FLIP                                       0x00000054
+#define NVSW_SUBCHAN_FENCE_WAKE                                      0x00000058
 #define NV11_SUBCHAN_DMA_SEMAPHORE                                   0x00000060
 #define NV11_SUBCHAN_SEMAPHORE_OFFSET                                0x00000064
 #define NV11_SUBCHAN_SEMAPHORE_ACQUIRE                               0x00000068
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 3c18049..3ba8dee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -68,7 +68,7 @@ nouveau_fence_update(struct nouveau_channel *chan)
 
 	spin_lock(&fctx->lock);
 	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
-		if (priv->read(chan) < fence->sequence)
+		if (priv->read(chan) - fence->sequence >= 0x80000000U)
 			break;
 
 		if (fence->work)
@@ -111,11 +111,9 @@ nouveau_fence_done(struct nouveau_fence *fence)
 	return !fence->channel;
 }
 
-int
-nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
+static int nouveau_fence_wait_busy(struct nouveau_fence *fence, bool lazy, bool intr)
 {
 	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
-	ktime_t t;
 	int ret = 0;
 
 	while (!nouveau_fence_done(fence)) {
@@ -127,7 +125,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 		__set_current_state(intr ? TASK_INTERRUPTIBLE :
 					   TASK_UNINTERRUPTIBLE);
 		if (lazy) {
-			t = ktime_set(0, sleep_time);
+			ktime_t t = ktime_set(0, sleep_time);
 			schedule_hrtimeout(&t, HRTIMER_MODE_REL);
 			sleep_time *= 2;
 			if (sleep_time > NSEC_PER_MSEC)
@@ -144,6 +142,47 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 	return ret;
 }
 
+static int nouveau_fence_wait_event(struct nouveau_fence *fence, bool intr)
+{
+	struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
+	unsigned long timeout = fence->timeout;
+	int ret = 0;
+	struct nouveau_channel *chan = dev_priv->channel;
+	struct nouveau_channel *prev = fence->channel;
+	struct nouveau_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+
+	if (nouveau_fence_done(fence))
+		return 0;
+
+	if (!timeout)
+		timeout = jiffies + 3 * DRM_HZ;
+
+	if (prev != chan)
+		ret = priv->sync(fence, prev, chan);
+	if (ret)
+		goto busy;
+
+	if (intr)
+		ret = wait_event_interruptible_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
+	else
+		ret = wait_event_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
+
+	return ret;
+
+busy:
+	return nouveau_fence_wait_busy(fence, true, intr);
+}
+
+int
+nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
+{
+	struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
+	if (dev_priv->chipset >= 0x84 && dev_priv->channel && lazy)
+		return nouveau_fence_wait_event(fence, intr);
+	else
+		return nouveau_fence_wait_busy(fence, lazy, intr);
+}
+
 int
 nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h b/drivers/gpu/drm/nouveau/nouveau_fifo.h
index ce99cab..942e211 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fifo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h
@@ -15,6 +15,7 @@ int  nv04_fifo_fini(struct drm_device *, int, bool);
 int  nv04_fifo_init(struct drm_device *, int);
 void nv04_fifo_isr(struct drm_device *);
 void nv04_fifo_destroy(struct drm_device *, int);
+bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid);
 
 void nv50_fifo_playlist_update(struct drm_device *);
 void nv50_fifo_destroy(struct drm_device *, int);
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 19706f0..6cdfacb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -515,6 +515,7 @@ nouveau_card_channel_init(struct drm_device *dev)
 	dev_priv->channel = chan;
 	if (ret)
 		return ret;
+	init_waitqueue_head(&dev_priv->fence_wq);
 	mutex_unlock(&dev_priv->channel->mutex);
 
 	nouveau_bo_move_init(chan);
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index a6295cd..1665a82 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -307,6 +307,26 @@ out:
 	return handled;
 }
 
+bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid)
+{
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_channel *chan = NULL;
+	bool handled = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->channels.lock, flags);
+	if (likely(chid >= 0 && chid < pfifo->channels)) {
+		chan = dev_priv->channel;
+		if (chan->id == chid) {
+			wake_up_all(&dev_priv->fence_wq);
+			handled = true;
+		}
+	}
+	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+	return handled;
+}
+
 static const char *nv_dma_state_err(u32 state)
 {
 	static const char * const desc[] = {
@@ -448,6 +468,11 @@ nv04_fifo_isr(struct drm_device *dev)
 				status &= ~0x00000010;
 				nv_wr32(dev, 0x002100, 0x00000010);
 			}
+			if (status & 0x80000000) {
+				nouveau_fifo_wakeup(dev, chid);
+				status &= ~0x80000000;
+				nv_wr32(dev, 0x002100, 0x80000000);
+			}
 		}
 
 		if (status) {
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index c2f889b..2b8c3e5 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -42,15 +42,20 @@ static int
 nv84_fence_emit(struct nouveau_fence *fence)
 {
 	struct nouveau_channel *chan = fence->channel;
-	int ret = RING_SPACE(chan, 7);
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	bool intr = dev_priv->channel == chan;
+
+	int ret = RING_SPACE(chan, 7 + intr);
 	if (ret == 0) {
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
 		OUT_RING  (chan, NvSema);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
 		OUT_RING  (chan, upper_32_bits(chan->id * 16));
 		OUT_RING  (chan, lower_32_bits(chan->id * 16));
 		OUT_RING  (chan, fence->sequence);
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		if (intr)
+			OUT_RING  (chan, 0);
 		FIRE_RING (chan);
 	}
 	return ret;
@@ -61,15 +66,20 @@ static int
 nv84_fence_sync(struct nouveau_fence *fence,
 		struct nouveau_channel *prev, struct nouveau_channel *chan)
 {
-	int ret = RING_SPACE(chan, 7);
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	bool intr = dev_priv->channel == chan;
+
+	int ret = RING_SPACE(chan, 7 + intr);
 	if (ret == 0) {
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
 		OUT_RING  (chan, NvSema);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
 		OUT_RING  (chan, upper_32_bits(prev->id * 16));
 		OUT_RING  (chan, lower_32_bits(prev->id * 16));
 		OUT_RING  (chan, fence->sequence);
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
+		if (intr)
+			OUT_RING  (chan, 0);
 		FIRE_RING (chan);
 	}
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
index 47ab388..25805ef 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fence.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
@@ -45,15 +45,19 @@ nvc0_fence_emit(struct nouveau_fence *fence)
 	struct nouveau_channel *chan = fence->channel;
 	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
 	u64 addr = fctx->vma.offset + chan->id * 16;
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	bool intr = dev_priv->channel == chan;
 	int ret;
 
-	ret = RING_SPACE(chan, 5);
+	ret = RING_SPACE(chan, 5 + intr);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 		OUT_RING  (chan, upper_32_bits(addr));
 		OUT_RING  (chan, lower_32_bits(addr));
 		OUT_RING  (chan, fence->sequence);
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		if (intr)
+			BEGIN_IMC0(chan, 0, 0x058, 0);
 		FIRE_RING (chan);
 	}
 
@@ -64,11 +68,13 @@ static int
 nvc0_fence_sync(struct nouveau_fence *fence,
 		struct nouveau_channel *prev, struct nouveau_channel *chan)
 {
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
 	u64 addr = fctx->vma.offset + prev->id * 16;
+	bool intr = dev_priv->channel == chan;
 	int ret;
 
-	ret = RING_SPACE(chan, 5);
+	ret = RING_SPACE(chan, 5 + intr);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 		OUT_RING  (chan, upper_32_bits(addr));
@@ -76,6 +82,8 @@ nvc0_fence_sync(struct nouveau_fence *fence,
 		OUT_RING  (chan, fence->sequence);
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
 				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
+		if (intr)
+			BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_FENCE_WAKE, 0);
 		FIRE_RING (chan);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index 7d85553..b3930c7 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -356,7 +356,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
 		if (mthd == 0x0054) {
 			if (!nvc0_fifo_page_flip(dev, chid))
 				show &= ~0x00200000;
-		}
+		} else if (mthd == 0x0058 && nouveau_fifo_wakeup(dev, chid))
+			show &= ~0x00200000;
 	}
 
 	if (show) {
diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c
index 1855ecb..33e41cd 100644
--- a/drivers/gpu/drm/nouveau/nve0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nve0_fifo.c
@@ -303,11 +303,18 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
 	u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f;
 	u32 subc = (addr & 0x00070000);
 	u32 mthd = (addr & 0x00003ffc);
+	u32 show = stat;
 
-	NV_INFO(dev, "PSUBFIFO %d:", unit);
-	nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat);
-	NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
-		unit, chid, subc, mthd, data);
+	if (stat & 0x00200000 && mthd == 0x0058 &&
+	    nouveau_fifo_wakeup(dev, chid))
+		show &= ~0x00200000;
+
+	if (show) {
+		NV_INFO(dev, "PSUBFIFO %d:", unit);
+		nouveau_bitfield_print(nve0_fifo_subfifo_intr, show);
+		NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
+			unit, chid, subc, mthd, data);
+	}
 
 	nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008);
 	nv_wr32(dev, 0x040108 + (unit * 0x2000), stat);



More information about the dri-devel mailing list