[PATCH] nouveau: Add irq waiting as alternative to busywait
Maarten Maathuis
madman2003 at gmail.com
Fri Jul 13 15:56:01 PDT 2012
On Fri, Jul 13, 2012 at 11:35 PM, Maarten Lankhorst
<m.b.lankhorst at gmail.com> wrote:
> A way to trigger an irq will be needed for optimus support since
> cpu-waiting isn't always viable there. This could also be nice for
> power saving on since cpu would no longer have to spin, and
> performance might improve slightly on cpu-limited workloads.
>
> Some way to quantify these effects would be nice, even if the
> end result would be 'no performance regression'. An earlier
> version always emitted an interrupt, resulting in glxgears going
> from 8k fps to 7k. However this is no longer the case, as I'm
> using the kernel submission channel for generating irqs as
> needed now.
>
> On nv84 I'm using NOTIFY_INTR, but that might have been
> removed on fermi, so instead I'm using invalid command
> 0x0058 now as a way to signal completion.
Out of curiosity, isn't this like a handcoded version of software
methods? If so, why handcoded? Or are software methods not supported
on NVC0?
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
>
> ---
> drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +
> drivers/gpu/drm/nouveau/nouveau_fence.c | 49 ++++++++++++++++++++++++++++---
> drivers/gpu/drm/nouveau/nouveau_fifo.h | 1 +
> drivers/gpu/drm/nouveau/nouveau_state.c | 1 +
> drivers/gpu/drm/nouveau/nv04_fifo.c | 25 ++++++++++++++++
> drivers/gpu/drm/nouveau/nv84_fence.c | 18 +++++++++--
> drivers/gpu/drm/nouveau/nvc0_fence.c | 12 ++++++--
> drivers/gpu/drm/nouveau/nvc0_fifo.c | 3 +-
> drivers/gpu/drm/nouveau/nve0_fifo.c | 15 +++++++--
> 9 files changed, 110 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index f97a1a7..d9d274d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -707,6 +707,7 @@ struct drm_nouveau_private {
> struct drm_mm heap;
> struct nouveau_bo *bo;
> } fence;
> + wait_queue_head_t fence_wq;
>
> struct {
> spinlock_t lock;
> @@ -1656,6 +1657,7 @@ nv44_graph_class(struct drm_device *dev)
> #define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
> #define NV10_SUBCHAN_REF_CNT 0x00000050
> #define NVSW_SUBCHAN_PAGE_FLIP 0x00000054
> +#define NVSW_SUBCHAN_FENCE_WAKE 0x00000058
> #define NV11_SUBCHAN_DMA_SEMAPHORE 0x00000060
> #define NV11_SUBCHAN_SEMAPHORE_OFFSET 0x00000064
> #define NV11_SUBCHAN_SEMAPHORE_ACQUIRE 0x00000068
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 3c18049..3ba8dee 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -68,7 +68,7 @@ nouveau_fence_update(struct nouveau_channel *chan)
>
> spin_lock(&fctx->lock);
> list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
> - if (priv->read(chan) < fence->sequence)
> + if (priv->read(chan) - fence->sequence >= 0x80000000U)
> break;
>
> if (fence->work)
> @@ -111,11 +111,9 @@ nouveau_fence_done(struct nouveau_fence *fence)
> return !fence->channel;
> }
>
> -int
> -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +static int nouveau_fence_wait_busy(struct nouveau_fence *fence, bool lazy, bool intr)
> {
> unsigned long sleep_time = NSEC_PER_MSEC / 1000;
> - ktime_t t;
> int ret = 0;
>
> while (!nouveau_fence_done(fence)) {
> @@ -127,7 +125,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> __set_current_state(intr ? TASK_INTERRUPTIBLE :
> TASK_UNINTERRUPTIBLE);
> if (lazy) {
> - t = ktime_set(0, sleep_time);
> + ktime_t t = ktime_set(0, sleep_time);
> schedule_hrtimeout(&t, HRTIMER_MODE_REL);
> sleep_time *= 2;
> if (sleep_time > NSEC_PER_MSEC)
> @@ -144,6 +142,47 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> return ret;
> }
>
> +static int nouveau_fence_wait_event(struct nouveau_fence *fence, bool intr)
> +{
> + struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
> + unsigned long timeout = fence->timeout;
> + int ret = 0;
> + struct nouveau_channel *chan = dev_priv->channel;
> + struct nouveau_channel *prev = fence->channel;
> + struct nouveau_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
> +
> + if (nouveau_fence_done(fence))
> + return 0;
> +
> + if (!timeout)
> + timeout = jiffies + 3 * DRM_HZ;
> +
> + if (prev != chan)
> + ret = priv->sync(fence, prev, chan);
> + if (ret)
> + goto busy;
> +
> + if (intr)
> + ret = wait_event_interruptible_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
> + else
> + ret = wait_event_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
> +
> + return ret;
> +
> +busy:
> + return nouveau_fence_wait_busy(fence, true, intr);
> +}
> +
> +int
> +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +{
> + struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
> + if (dev_priv->chipset >= 0x84 && dev_priv->channel && lazy)
> + return nouveau_fence_wait_event(fence, intr);
> + else
> + return nouveau_fence_wait_busy(fence, lazy, intr);
> +}
> +
> int
> nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
> {
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> index ce99cab..942e211 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fifo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> @@ -15,6 +15,7 @@ int nv04_fifo_fini(struct drm_device *, int, bool);
> int nv04_fifo_init(struct drm_device *, int);
> void nv04_fifo_isr(struct drm_device *);
> void nv04_fifo_destroy(struct drm_device *, int);
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid);
>
> void nv50_fifo_playlist_update(struct drm_device *);
> void nv50_fifo_destroy(struct drm_device *, int);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
> index 19706f0..6cdfacb 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_state.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_state.c
> @@ -515,6 +515,7 @@ nouveau_card_channel_init(struct drm_device *dev)
> dev_priv->channel = chan;
> if (ret)
> return ret;
> + init_waitqueue_head(&dev_priv->fence_wq);
> mutex_unlock(&dev_priv->channel->mutex);
>
> nouveau_bo_move_init(chan);
> diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
> index a6295cd..1665a82 100644
> --- a/drivers/gpu/drm/nouveau/nv04_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
> @@ -307,6 +307,26 @@ out:
> return handled;
> }
>
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid)
> +{
> + struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + struct nouveau_channel *chan = NULL;
> + bool handled = false;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&dev_priv->channels.lock, flags);
> + if (likely(chid >= 0 && chid < pfifo->channels)) {
> + chan = dev_priv->channel;
> + if (chan->id == chid) {
> + wake_up_all(&dev_priv->fence_wq);
> + handled = true;
> + }
> + }
> + spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
> + return handled;
> +}
> +
> static const char *nv_dma_state_err(u32 state)
> {
> static const char * const desc[] = {
> @@ -448,6 +468,11 @@ nv04_fifo_isr(struct drm_device *dev)
> status &= ~0x00000010;
> nv_wr32(dev, 0x002100, 0x00000010);
> }
> + if (status & 0x80000000) {
> + nouveau_fifo_wakeup(dev, chid);
> + status &= ~0x80000000;
> + nv_wr32(dev, 0x002100, 0x80000000);
> + }
> }
>
> if (status) {
> diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
> index c2f889b..2b8c3e5 100644
> --- a/drivers/gpu/drm/nouveau/nv84_fence.c
> +++ b/drivers/gpu/drm/nouveau/nv84_fence.c
> @@ -42,15 +42,20 @@ static int
> nv84_fence_emit(struct nouveau_fence *fence)
> {
> struct nouveau_channel *chan = fence->channel;
> - int ret = RING_SPACE(chan, 7);
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> +
> + int ret = RING_SPACE(chan, 7 + intr);
> if (ret == 0) {
> BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
> OUT_RING (chan, NvSema);
> - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
> OUT_RING (chan, upper_32_bits(chan->id * 16));
> OUT_RING (chan, lower_32_bits(chan->id * 16));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> + if (intr)
> + OUT_RING (chan, 0);
> FIRE_RING (chan);
> }
> return ret;
> @@ -61,15 +66,20 @@ static int
> nv84_fence_sync(struct nouveau_fence *fence,
> struct nouveau_channel *prev, struct nouveau_channel *chan)
> {
> - int ret = RING_SPACE(chan, 7);
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> +
> + int ret = RING_SPACE(chan, 7 + intr);
> if (ret == 0) {
> BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
> OUT_RING (chan, NvSema);
> - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
> OUT_RING (chan, upper_32_bits(prev->id * 16));
> OUT_RING (chan, lower_32_bits(prev->id * 16));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
> + if (intr)
> + OUT_RING (chan, 0);
> FIRE_RING (chan);
> }
> return ret;
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
> index 47ab388..25805ef 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fence.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
> @@ -45,15 +45,19 @@ nvc0_fence_emit(struct nouveau_fence *fence)
> struct nouveau_channel *chan = fence->channel;
> struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
> u64 addr = fctx->vma.offset + chan->id * 16;
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> int ret;
>
> - ret = RING_SPACE(chan, 5);
> + ret = RING_SPACE(chan, 5 + intr);
> if (ret == 0) {
> BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> OUT_RING (chan, upper_32_bits(addr));
> OUT_RING (chan, lower_32_bits(addr));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> + if (intr)
> + BEGIN_IMC0(chan, 0, 0x058, 0);
> FIRE_RING (chan);
> }
>
> @@ -64,11 +68,13 @@ static int
> nvc0_fence_sync(struct nouveau_fence *fence,
> struct nouveau_channel *prev, struct nouveau_channel *chan)
> {
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
> u64 addr = fctx->vma.offset + prev->id * 16;
> + bool intr = dev_priv->channel == chan;
> int ret;
>
> - ret = RING_SPACE(chan, 5);
> + ret = RING_SPACE(chan, 5 + intr);
> if (ret == 0) {
> BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> OUT_RING (chan, upper_32_bits(addr));
> @@ -76,6 +82,8 @@ nvc0_fence_sync(struct nouveau_fence *fence,
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
> NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
> + if (intr)
> + BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_FENCE_WAKE, 0);
> FIRE_RING (chan);
> }
>
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> index 7d85553..b3930c7 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> @@ -356,7 +356,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
> if (mthd == 0x0054) {
> if (!nvc0_fifo_page_flip(dev, chid))
> show &= ~0x00200000;
> - }
> + } else if (mthd == 0x0058 && nouveau_fifo_wakeup(dev, chid))
> + show &= ~0x00200000;
> }
>
> if (show) {
> diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c
> index 1855ecb..33e41cd 100644
> --- a/drivers/gpu/drm/nouveau/nve0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c
> @@ -303,11 +303,18 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
> u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f;
> u32 subc = (addr & 0x00070000);
> u32 mthd = (addr & 0x00003ffc);
> + u32 show = stat;
>
> - NV_INFO(dev, "PSUBFIFO %d:", unit);
> - nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat);
> - NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
> - unit, chid, subc, mthd, data);
> + if (stat & 0x00200000 && mthd == 0x0058 &&
> + nouveau_fifo_wakeup(dev, chid))
> + show &= ~0x00200000;
> +
> + if (show) {
> + NV_INFO(dev, "PSUBFIFO %d:", unit);
> + nouveau_bitfield_print(nve0_fifo_subfifo_intr, show);
> + NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
> + unit, chid, subc, mthd, data);
> + }
>
> nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008);
> nv_wr32(dev, 0x040108 + (unit * 0x2000), stat);
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Far away from the primal instinct, the song seems to fade away, the
river get wider between your thoughts and the things we do and say.
More information about the dri-devel
mailing list