[PATCH] drm/radeon/kms: attempt to avoid copying data twice on coherent cards. (v2)
Alex Deucher
alexdeucher at gmail.com
Wed Apr 4 06:31:33 PDT 2012
On Tue, Apr 3, 2012 at 11:23 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> On coherent systems (not-AGP) the IB should be in cached memory so should
> be just as fast, so we can avoid copying to temporary pages and just use it
> directly.
>
> provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6,
> but always good to use less CPU if we can.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/radeon/radeon.h | 1 +
> drivers/gpu/drm/radeon/radeon_cs.c | 46 ++++++++++++++++++++++++----------
> drivers/gpu/drm/radeon/radeon_drv.c | 4 +++
> 3 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 138b952..5331b27 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -94,6 +94,7 @@ extern int radeon_disp_priority;
> extern int radeon_hw_i2c;
> extern int radeon_pcie_gen2;
> extern int radeon_msi;
> +extern int radeon_copy1;
>
> /*
> * Copy from radeon_drv.h so we don't have to include both and have conflicting
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 5cac832..1647be2 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -278,11 +278,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
> p->chunks[p->chunk_ib_idx].length_dw);
> return -EINVAL;
> }
> - p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> - p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> - if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
> - p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
> - return -ENOMEM;
> + if ((p->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
> + p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
> + p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
> + kfree(p->chunks[i].kpage[0]);
> + kfree(p->chunks[i].kpage[1]);
> + return -ENOMEM;
> + }
> + }
> p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
> p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
> p->chunks[p->chunk_ib_idx].last_copied_page = -1;
> @@ -323,8 +328,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
> kfree(parser->relocs_ptr);
> for (i = 0; i < parser->nchunks; i++) {
> kfree(parser->chunks[i].kdata);
> - kfree(parser->chunks[i].kpage[0]);
> - kfree(parser->chunks[i].kpage[1]);
> + if ((parser->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
> + kfree(parser->chunks[i].kpage[0]);
> + kfree(parser->chunks[i].kpage[1]);
> + }
> }
> kfree(parser->chunks);
> kfree(parser->chunks_array);
> @@ -573,6 +580,8 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
> struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
> int i;
> int size = PAGE_SIZE;
> + void *ptr;
> + bool copy1 = (p->rdev->flags & RADEON_IS_AGP || !radeon_copy1) ? false : true;
>
> for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
> if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
> @@ -583,23 +592,32 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
> }
> }
>
> - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> -
> if (pg_idx == ibc->last_page_index) {
> size = (ibc->length_dw * 4) % PAGE_SIZE;
> - if (size == 0)
> - size = PAGE_SIZE;
> + if (size == 0)
> + size = PAGE_SIZE;
> }
>
> - if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
> + if (!copy1) {
> + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> + ptr = ibc->kpage[new_page];
> + } else {
> + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> + ptr = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4));
> + ibc->kpage[new_page] = ptr;
> + }
> +
> + if (DRM_COPY_FROM_USER(ptr,
> ibc->user_ptr + (pg_idx * PAGE_SIZE),
> size)) {
> p->parser_error = -EFAULT;
> return 0;
> }
>
> - /* copy to IB here */
> - memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
> + if (!copy1) {
> + /* copy to IB here */
> + memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
> + }
>
> ibc->last_copied_page = pg_idx;
> ibc->kpage_idx[new_page] = pg_idx;
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
> index ef7bb3f..3ea3377 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> @@ -128,6 +128,7 @@ int radeon_disp_priority = 0;
> int radeon_hw_i2c = 0;
> int radeon_pcie_gen2 = 0;
> int radeon_msi = -1;
> +int radeon_copy1 = 0;
>
> MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
> module_param_named(no_wb, radeon_no_wb, int, 0444);
> @@ -177,6 +178,9 @@ module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
> MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
> module_param_named(msi, radeon_msi, int, 0444);
>
> +MODULE_PARM_DESC(copy1, "One Copy test");
> +module_param_named(copy1, radeon_copy1, int, 0644);
> +
> static int radeon_suspend(struct drm_device *dev, pm_message_t state)
> {
> drm_radeon_private_t *dev_priv = dev->dev_private;
> --
> 1.7.6.5
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
More information about the dri-devel
mailing list