[PATCH] drm/radeon: Inline r100_mm_rreg, v2
Lauri Kasanen
cand at gmx.com
Fri Apr 18 14:11:47 PDT 2014
This was originally un-inlined by Andi Kleen in 2011 citing size concerns.
Indeed, a first attempt at inlining it grew radeon.ko by 7%.
However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps
in Urban Terror.
v2: We know the minimum MMIO size. Adding it to the if allows the compiler to
optimize the branch out, improving both performance and size.
The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense
says perf is now more than 1% better.
Signed-off-by: Lauri Kasanen <cand at gmx.com>
---
drivers/gpu/drm/radeon/r100.c | 18 ------------------
drivers/gpu/drm/radeon/radeon.h | 21 +++++++++++++++++++--
2 files changed, 19 insertions(+), 20 deletions(-)
The _wreg function could be given similar treatment, but as it's nowhere as hot
(0.009% vs 2%), didn't bother.
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index b6c3264..8169e82 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4086,24 +4086,6 @@ int r100_init(struct radeon_device *rdev)
return 0;
}
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
- bool always_indirect)
-{
- if (reg < rdev->rmmio_size && !always_indirect)
- return readl(((void __iomem *)rdev->rmmio) + reg);
- else {
- unsigned long flags;
- uint32_t ret;
-
- spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
- writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
- ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
- spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-
- return ret;
- }
-}
-
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
bool always_indirect)
{
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index f21db7a..883276a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2328,8 +2328,25 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
- bool always_indirect);
+static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+ bool always_indirect)
+{
+ /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
+ if ((reg < rdev->rmmio_size || reg < 0x10000) && !always_indirect)
+ return readl(((void __iomem *)rdev->rmmio) + reg);
+ else {
+ unsigned long flags;
+ uint32_t ret;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+
+ return ret;
+ }
+}
+
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
bool always_indirect);
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
--
1.8.3.1
More information about the dri-devel
mailing list