[Nouveau] [PATCH] drm/nouveau: OUT_RINGp - optimize OUT_RING loops

Pekka Paalanen pq at iki.fi
Sun Aug 16 15:20:47 PDT 2009


The introduction of nouveau_bo_wr32() in commit "drm/nouveau: use bo
accessors for push buffers" to OUT_RING() made it considerably slower.

'x11perf -aa10text' benchmark hits the OUT_RING hard, since user
pushbuffers are first copied from user to kernel, and then from kernel
to the real pushbuffer bo using OUT_RING. The speed
- before nouveau_bo_wr32(): 677k/sec
- after: 475k/sec

This patch implements OUT_RINGp() for copying an arbitrary number of
dwords from an array to the pushbuffer bo. All OUT_RING copy loops are
replaced with calls to OUT_RINGp(). This brings aa10text speed to
785k/sec.

The tests have been run on nv28, Athlon64 3000+ (x86_64) and oprofile
running.

Signed-off-by: Pekka Paalanen <pq at iki.fi>
---
 drivers/gpu/drm/nouveau/nouveau_dma.c |   13 +++++++++++++
 drivers/gpu/drm/nouveau/nouveau_dma.h |    3 +++
 drivers/gpu/drm/nouveau/nouveau_gem.c |    3 +--
 drivers/gpu/drm/nouveau/nv04_fbcon.c  |   14 ++++++++------
 drivers/gpu/drm/nouveau/nv50_fbcon.c  |    4 ++--
 5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 183a7d5..0025c3d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -90,6 +90,19 @@ nouveau_dma_init(struct nouveau_channel *chan)
 	return 0;
 }
 
+void
+OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
+{
+	bool is_iomem;
+	u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+	mem = &mem[chan->dma.cur];
+	if (is_iomem)
+		memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4);
+	else
+		memcpy(mem, data, nr_dwords * 4);
+	chan->dma.cur += nr_dwords;
+}
+
 static inline bool
 READ_GET(struct nouveau_channel *chan, uint32_t *get)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 072d9b9..cdaa37d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -109,6 +109,9 @@ OUT_RING(struct nouveau_channel *chan, int data)
 	nouveau_bo_wr32(chan->pushbuf_bo, chan->dma.cur++, data);
 }
 
+extern void
+OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords);
+
 static inline void
 BEGIN_RING(struct nouveau_channel *chan, int subc, int mthd, int size)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 4516058..8b2c9e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -528,8 +528,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	for (i = 0; i < req->nr_dwords; i++)
-		OUT_RING (chan, pushbuf[i]);
+	OUT_RINGp(chan, pushbuf, req->nr_dwords);
 
 	ret = nouveau_fence_emit(fence);
 	if (ret) {
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 648b435..14fc87f 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -94,9 +94,12 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct drm_device *dev = par->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = dev_priv->channel;
-	uint32_t fg, bg, mask = ~(~0 >> (32 - info->var.bits_per_pixel));
-	uint32_t dsize, width, *data = (uint32_t *) image->data;
-	int j, k = 0;
+	uint32_t fg;
+	uint32_t bg;
+	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
+	uint32_t dsize;
+	uint32_t width;
+	uint32_t *data = (uint32_t *)image->data;
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
@@ -140,9 +143,8 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		}
 
 		BEGIN_RING(chan, NvSubGdiRect, 0x0c00, iter_len);
-		for (j = iter_len; j--;)
-			OUT_RING(chan, data[k++]);
-
+		OUT_RINGp(chan, data, iter_len);
+		data += iter_len;
 		dsize -= iter_len;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index d7af9ed..d3807e3 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -139,8 +139,8 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		dwords -= push;
 
 		BEGIN_RING(chan, NvSub2D, 0x40000860, push);
-		while (push--)
-			OUT_RING(chan, *data++);
+		OUT_RINGp(chan, data, push);
+		data += push;
 	}
 
 	FIRE_RING (chan);
-- 
1.6.3.3



More information about the Nouveau mailing list