[PATCH] drm/radeon/kms: attempt to avoid copying data twice on coherent cards. (v2)

Alex Deucher alexdeucher at gmail.com
Wed Apr 4 06:31:33 PDT 2012


On Tue, Apr 3, 2012 at 11:23 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> On coherent systems (not-AGP) the IB should be in cached memory so should
> be just as fast, so we can avoid copying to temporary pages and just use it
> directly.
>
> provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6,
> but always good to use less CPU if we can.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/radeon/radeon.h     |    1 +
>  drivers/gpu/drm/radeon/radeon_cs.c  |   46 ++++++++++++++++++++++++----------
>  drivers/gpu/drm/radeon/radeon_drv.c |    4 +++
>  3 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 138b952..5331b27 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -94,6 +94,7 @@ extern int radeon_disp_priority;
>  extern int radeon_hw_i2c;
>  extern int radeon_pcie_gen2;
>  extern int radeon_msi;
> +extern int radeon_copy1;
>
>  /*
>  * Copy from radeon_drv.h so we don't have to include both and have conflicting
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 5cac832..1647be2 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -278,11 +278,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>                                  p->chunks[p->chunk_ib_idx].length_dw);
>                        return -EINVAL;
>                }
> -               p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> -               p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> -               if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
> -                   p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
> -                       return -ENOMEM;
> +               if ((p->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
> +                       p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +                       p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +                       if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
> +                           p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
> +                               kfree(p->chunks[i].kpage[0]);
> +                               kfree(p->chunks[i].kpage[1]);
> +                               return -ENOMEM;
> +                       }
> +               }
>                p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
>                p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
>                p->chunks[p->chunk_ib_idx].last_copied_page = -1;
> @@ -323,8 +328,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
>        kfree(parser->relocs_ptr);
>        for (i = 0; i < parser->nchunks; i++) {
>                kfree(parser->chunks[i].kdata);
> -               kfree(parser->chunks[i].kpage[0]);
> -               kfree(parser->chunks[i].kpage[1]);
> +               if ((parser->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) {
> +                       kfree(parser->chunks[i].kpage[0]);
> +                       kfree(parser->chunks[i].kpage[1]);
> +               }
>        }
>        kfree(parser->chunks);
>        kfree(parser->chunks_array);
> @@ -573,6 +580,8 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
>        struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
>        int i;
>        int size = PAGE_SIZE;
> +       void *ptr;
> +       bool copy1 = (p->rdev->flags & RADEON_IS_AGP || !radeon_copy1) ? false : true;
>
>        for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
>                if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
> @@ -583,23 +592,32 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
>                }
>        }
>
> -       new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> -
>        if (pg_idx == ibc->last_page_index) {
>                size = (ibc->length_dw * 4) % PAGE_SIZE;
> -                       if (size == 0)
> -                               size = PAGE_SIZE;
> +               if (size == 0)
> +                       size = PAGE_SIZE;
>        }
>
> -       if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
> +       if (!copy1) {
> +               new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> +               ptr = ibc->kpage[new_page];
> +       } else {
> +               new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
> +               ptr = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4));
> +               ibc->kpage[new_page] = ptr;
> +       }
> +
> +       if (DRM_COPY_FROM_USER(ptr,
>                               ibc->user_ptr + (pg_idx * PAGE_SIZE),
>                               size)) {
>                p->parser_error = -EFAULT;
>                return 0;
>        }
>
> -       /* copy to IB here */
> -       memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
> +       if (!copy1) {
> +               /* copy to IB here */
> +               memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
> +       }
>
>        ibc->last_copied_page = pg_idx;
>        ibc->kpage_idx[new_page] = pg_idx;
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
> index ef7bb3f..3ea3377 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> @@ -128,6 +128,7 @@ int radeon_disp_priority = 0;
>  int radeon_hw_i2c = 0;
>  int radeon_pcie_gen2 = 0;
>  int radeon_msi = -1;
> +int radeon_copy1 = 0;
>
>  MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
>  module_param_named(no_wb, radeon_no_wb, int, 0444);
> @@ -177,6 +178,9 @@ module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
>  MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
>  module_param_named(msi, radeon_msi, int, 0444);
>
> +MODULE_PARM_DESC(copy1, "One Copy test");
> +module_param_named(copy1, radeon_copy1, int, 0644);
> +
>  static int radeon_suspend(struct drm_device *dev, pm_message_t state)
>  {
>        drm_radeon_private_t *dev_priv = dev->dev_private;
> --
> 1.7.6.5
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel


More information about the dri-devel mailing list