[PATCH] drm/radeon: Inline r100_mm_rreg

Ilia Mirkin imirkin at alum.mit.edu
Thu Apr 10 09:19:10 PDT 2014


On Thu, Apr 10, 2014 at 9:08 AM, Lauri Kasanen <cand at gmx.com> wrote:
> This was originally un-inlined by Andi Kleen in 2011 citing size concerns.
> Indeed, inlining it grows radeon.ko by 7%.
>
> However, 2% of cpu is spent in this function. Inlining it gives 1% more fps
> in Urban Terror.
>
> Signed-off-by: Lauri Kasanen <cand at gmx.com>
> ---
>  drivers/gpu/drm/radeon/r100.c   | 18 ------------------
>  drivers/gpu/drm/radeon/radeon.h | 20 ++++++++++++++++++--
>  2 files changed, 18 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
> index b6c3264..8169e82 100644
> --- a/drivers/gpu/drm/radeon/r100.c
> +++ b/drivers/gpu/drm/radeon/r100.c
> @@ -4086,24 +4086,6 @@ int r100_init(struct radeon_device *rdev)
>         return 0;
>  }
>
> -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> -                     bool always_indirect)
> -{
> -       if (reg < rdev->rmmio_size && !always_indirect)
> -               return readl(((void __iomem *)rdev->rmmio) + reg);
> -       else {
> -               unsigned long flags;
> -               uint32_t ret;
> -
> -               spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> -               writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> -               ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> -               spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> -
> -               return ret;
> -       }
> -}
> -
>  void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
>                   bool always_indirect)
>  {
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5cf10a7..9231100 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2330,8 +2330,24 @@ int radeon_device_init(struct radeon_device *rdev,
>  void radeon_device_fini(struct radeon_device *rdev);
>  int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
>
> -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> -                     bool always_indirect);
> +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> +                                   bool always_indirect)
> +{
> +       if (reg < rdev->rmmio_size && !always_indirect)
> +               return readl(((void __iomem *)rdev->rmmio) + reg);

Quick thought from someone entirely unfamiliar with the hardware:
perhaps you can get the performance benefit without the size increase
by moving the else portion into a non-inline function? I'm guessing
that most accesses happen in the "if" branch.

> +       else {
> +               unsigned long flags;
> +               uint32_t ret;
> +
> +               spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> +               writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> +               ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> +               spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> +
> +               return ret;
> +       }
> +}
> +
>  void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
>                   bool always_indirect);
>  u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
> --
> 1.8.3.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel


More information about the dri-devel mailing list