[PATCH] drm/radeon/kms: attempt to avoid copying data twice on coherent cards. (v2)

Dave Airlie airlied at gmail.com
Tue Apr 3 08:23:41 PDT 2012


From: Dave Airlie <airlied at redhat.com>

On coherent systems (not-AGP) the IB should be in cached memory so should
be just as fast, so we can avoid copying to temporary pages and just use it
directly.

provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6,
but always good to use less CPU if we can.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h     |    1 +
 drivers/gpu/drm/radeon/radeon_cs.c  |   46 ++++++++++++++++++++++++----------
 drivers/gpu/drm/radeon/radeon_drv.c |    4 +++
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 138b952..5331b27 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -94,6 +94,7 @@ extern int radeon_disp_priority;
 extern int radeon_hw_i2c;
 extern int radeon_pcie_gen2;
 extern int radeon_msi;
+extern int radeon_copy1;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 5cac832..1647be2 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,11 +278,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 				  p->chunks[p->chunk_ib_idx].length_dw);
 			return -EINVAL;
 		}
-		p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
-		    p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
-			return -ENOMEM;
+		if ((p->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
+			p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
+			    p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
+				kfree(p->chunks[i].kpage[0]);
+				kfree(p->chunks[i].kpage[1]);
+				return -ENOMEM;
+			}
+		}
 		p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
 		p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
 		p->chunks[p->chunk_ib_idx].last_copied_page = -1;
@@ -323,8 +328,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 	kfree(parser->relocs_ptr);
 	for (i = 0; i < parser->nchunks; i++) {
 		kfree(parser->chunks[i].kdata);
-		kfree(parser->chunks[i].kpage[0]);
-		kfree(parser->chunks[i].kpage[1]);
+		if ((parser->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
+			kfree(parser->chunks[i].kpage[0]);
+			kfree(parser->chunks[i].kpage[1]);
+		}
 	}
 	kfree(parser->chunks);
 	kfree(parser->chunks_array);
@@ -573,6 +580,8 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
 	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
 	int i;
 	int size = PAGE_SIZE;
+	void *ptr;
+	bool copy1 = (p->rdev->flags & RADEON_IS_AGP || !radeon_copy1) ? false : true;
 
 	for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
 		if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
@@ -583,23 +592,32 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
 		}
 	}
 
-	new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
-
 	if (pg_idx == ibc->last_page_index) {
 		size = (ibc->length_dw * 4) % PAGE_SIZE;
-			if (size == 0)
-				size = PAGE_SIZE;
+		if (size == 0)
+			size = PAGE_SIZE;
 	}
 
-	if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
+	if (!copy1) {
+		new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
+		ptr = ibc->kpage[new_page];
+	} else {
+		new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
+		ptr = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4));
+		ibc->kpage[new_page] = ptr;
+	}
+
+	if (DRM_COPY_FROM_USER(ptr,
 			       ibc->user_ptr + (pg_idx * PAGE_SIZE),
 			       size)) {
 		p->parser_error = -EFAULT;
 		return 0;
 	}
 
-	/* copy to IB here */
-	memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
+	if (!copy1) {
+		/* copy to IB here */
+		memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
+	}
 
 	ibc->last_copied_page = pg_idx;
 	ibc->kpage_idx[new_page] = pg_idx;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index ef7bb3f..3ea3377 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -128,6 +128,7 @@ int radeon_disp_priority = 0;
 int radeon_hw_i2c = 0;
 int radeon_pcie_gen2 = 0;
 int radeon_msi = -1;
+int radeon_copy1 = 0;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -177,6 +178,9 @@ module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, radeon_msi, int, 0444);
 
+MODULE_PARM_DESC(copy1, "One Copy test");
+module_param_named(copy1, radeon_copy1, int, 0644);
+
 static int radeon_suspend(struct drm_device *dev, pm_message_t state)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-- 
1.7.6.5



More information about the dri-devel mailing list